Frederik Baerentsen
4 years ago
1 changed files with 29 additions and 0 deletions
@ -0,0 +1,29 @@ |
|||
import requests |
|||
import urllib.request |
|||
import time |
|||
from bs4 import BeautifulSoup |
|||
import re |
|||
import os |
|||
|
|||
url = 'https://brickset.com/library/catalogues' |
|||
|
|||
|
|||
response = requests.get(url) |
|||
soup = BeautifulSoup(response.text, "html.parser") |
|||
line_count = 1 #variable to track what line you are on |
|||
|
|||
for idx, val in enumerate(soup.findAll('h3')): |
|||
if idx != 0: |
|||
year=val.string |
|||
title=soup.find_all('p')[idx].contents[0] |
|||
link=soup.find_all('p')[idx].contents[3].get('href') |
|||
print(link," ",title," ",year) |
|||
|
|||
dpath="../Catalogues/"+year+"-"+title.strip().replace(' ', '_').replace('/', '-')+".pdf" |
|||
print(dpath) |
|||
if not os.path.exists(dpath): |
|||
r = requests.get(link, stream=True) |
|||
with open(dpath, 'wb') as f: |
|||
f.write(r.content) |
|||
time.sleep(1) #pause the code for a sec |
|||
line_count +=1 |
Loading…
Reference in new issue