download the page specified by each row

This commit is contained in:
Trowbridge 2019-12-18 16:52:11 -05:00
parent e7154b4975
commit 2887636034

View File

@ -4,6 +4,8 @@
import psycopg2 import psycopg2
import wget import wget
import sys import sys
import requests
import urllib.request
con = None con = None
@ -16,8 +18,14 @@ try:
cur.execute('SELECT code, url FROM scrape.raw') cur.execute('SELECT code, url FROM scrape.raw')
urls = cur.fetchall() urls = cur.fetchall()
headers = {
'User-Agent': "Mozilla/5.0 (X11; Linux x86_64)"
}
for row in urls: for row in urls:
print(row[0],row[1]) print(row[0],row[1])
#cont = wget.download(row[1])
html = requests.request("GET",row[1],headers=headers)
print(html.text)
except psycopg2.DatabaseError as e: except psycopg2.DatabaseError as e: