Added Python3 script to download catalogues from brickset
This commit is contained in:
parent
def111ed6a
commit
8c31492779
29
catalog.py
Normal file
29
catalog.py
Normal file
@ -0,0 +1,29 @@
|
||||
import requests
|
||||
import urllib.request
|
||||
import time
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import os
|
||||
|
||||
url = 'https://brickset.com/library/catalogues'
|
||||
|
||||
|
||||
response = requests.get(url)
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
line_count = 1 #variable to track what line you are on
|
||||
|
||||
for idx, val in enumerate(soup.findAll('h3')):
|
||||
if idx != 0:
|
||||
year=val.string
|
||||
title=soup.find_all('p')[idx].contents[0]
|
||||
link=soup.find_all('p')[idx].contents[3].get('href')
|
||||
print(link," ",title," ",year)
|
||||
|
||||
dpath="../Catalogues/"+year+"-"+title.strip().replace(' ', '_').replace('/', '-')+".pdf"
|
||||
print(dpath)
|
||||
if not os.path.exists(dpath):
|
||||
r = requests.get(link, stream=True)
|
||||
with open(dpath, 'wb') as f:
|
||||
f.write(r.content)
|
||||
time.sleep(1) #pause the code for a sec
|
||||
line_count +=1
|
Loading…
Reference in New Issue
Block a user