Added Python3 script to download catalogues from brickset

This commit is contained in:
Frederik Baerentsen 2020-06-24 13:06:31 +02:00
parent def111ed6a
commit 8c31492779

29
catalog.py Normal file
View File

@ -0,0 +1,29 @@
import requests
import urllib.request
import time
from bs4 import BeautifulSoup
import re
import os
url = 'https://brickset.com/library/catalogues'
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
line_count = 1 #variable to track what line you are on
for idx, val in enumerate(soup.findAll('h3')):
if idx != 0:
year=val.string
title=soup.find_all('p')[idx].contents[0]
link=soup.find_all('p')[idx].contents[3].get('href')
print(link," ",title," ",year)
dpath="../Catalogues/"+year+"-"+title.strip().replace(' ', '_').replace('/', '-')+".pdf"
print(dpath)
if not os.path.exists(dpath):
r = requests.get(link, stream=True)
with open(dpath, 'wb') as f:
f.write(r.content)
time.sleep(1) #pause the code for a sec
line_count +=1