download the page specified by each row
This commit is contained in:
parent
e7154b4975
commit
2887636034
@ -4,6 +4,8 @@
|
|||||||
import psycopg2
|
import psycopg2
|
||||||
import wget
|
import wget
|
||||||
import sys
|
import sys
|
||||||
|
import requests
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
con = None
|
con = None
|
||||||
|
|
||||||
@ -16,8 +18,14 @@ try:
|
|||||||
cur.execute('SELECT code, url FROM scrape.raw')
|
cur.execute('SELECT code, url FROM scrape.raw')
|
||||||
|
|
||||||
urls = cur.fetchall()
|
urls = cur.fetchall()
|
||||||
|
headers = {
|
||||||
|
'User-Agent': "Mozilla/5.0 (X11; Linux x86_64)"
|
||||||
|
}
|
||||||
for row in urls:
|
for row in urls:
|
||||||
print(row[0],row[1])
|
print(row[0],row[1])
|
||||||
|
#cont = wget.download(row[1])
|
||||||
|
html = requests.request("GET",row[1],headers=headers)
|
||||||
|
print(html.text)
|
||||||
|
|
||||||
except psycopg2.DatabaseError as e:
|
except psycopg2.DatabaseError as e:
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user