import requests import urllib.request import time from bs4 import BeautifulSoup import re import os url = 'https://brickset.com/library/catalogues' response = requests.get(url) soup = BeautifulSoup(response.text, "html.parser") line_count = 1 #variable to track what line you are on for idx, val in enumerate(soup.findAll('h3')): if idx != 0: year=val.string title=soup.find_all('p')[idx].contents[0] link=soup.find_all('p')[idx].contents[3].get('href') print(link," ",title," ",year) dpath="../Catalogues/"+year+"-"+title.strip().replace(' ', '_').replace('/', '-')+".pdf" print(dpath) if not os.path.exists(dpath): r = requests.get(link, stream=True) with open(dpath, 'wb') as f: f.write(r.content) time.sleep(1) #pause the code for a sec line_count +=1