DataHoarder_scripts/catalog.py

import requests 
import urllib.request 
import time 
from bs4 import BeautifulSoup
import re
import os

url = 'https://brickset.com/library/catalogues'


response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
line_count = 1 #variable to track what line you are on

for idx, val in enumerate(soup.findAll('h3')):
    if idx != 0:
        year=val.string
        title=soup.find_all('p')[idx].contents[0]
        link=soup.find_all('p')[idx].contents[3].get('href')
        print(link," ",title," ",year)

        dpath="../Catalogues/"+year+"-"+title.strip().replace(' ', '_').replace('/', '-')+".pdf"
        print(dpath)
        if not os.path.exists(dpath):     
            r = requests.get(link, stream=True)
            with open(dpath, 'wb') as f:
                f.write(r.content)
        time.sleep(1) #pause the code for a sec
        line_count +=1
Added Python3 script to download catalogues from brickset 2020-06-24 13:06:31 +02:00			`import requests`
			`import urllib.request`
			`import time`
			`from bs4 import BeautifulSoup`
			`import re`
			`import os`

			`url = 'https://brickset.com/library/catalogues'`


			`response = requests.get(url)`
			`soup = BeautifulSoup(response.text, "html.parser")`
			`line_count = 1 #variable to track what line you are on`

			`for idx, val in enumerate(soup.findAll('h3')):`
			`if idx != 0:`
			`year=val.string`
			`title=soup.find_all('p')[idx].contents[0]`
			`link=soup.find_all('p')[idx].contents[3].get('href')`
			`print(link," ",title," ",year)`

			`dpath="../Catalogues/"+year+"-"+title.strip().replace(' ', '_').replace('/', '-')+".pdf"`
			`print(dpath)`
			`if not os.path.exists(dpath):`
			`r = requests.get(link, stream=True)`
			`with open(dpath, 'wb') as f:`
			`f.write(r.content)`
			`time.sleep(1) #pause the code for a sec`
			`line_count +=1`