Added Python3 script to download catalogues from brickset

2020-06-24 13:06:31 +02:00 · 2020-06-24 13:06:31 +02:00 · 8c31492779
commit 8c31492779
parent def111ed6a
1 changed files with 29 additions and 0 deletions
--- a/catalog.py
+++ b/catalog.py
@ -0,0 +1,29 @@
+import requests 
+import urllib.request 
+import time 
+from bs4 import BeautifulSoup
+import re
+import os
+
+url = 'https://brickset.com/library/catalogues'
+
+
+response = requests.get(url)
+soup = BeautifulSoup(response.text, "html.parser")
+line_count = 1 #variable to track what line you are on
+
+for idx, val in enumerate(soup.findAll('h3')):
+    if idx != 0:
+        year=val.string
+        title=soup.find_all('p')[idx].contents[0]
+        link=soup.find_all('p')[idx].contents[3].get('href')
+        print(link," ",title," ",year)
+
+        dpath="../Catalogues/"+year+"-"+title.strip().replace(' ', '_').replace('/', '-')+".pdf"
+        print(dpath)
+        if not os.path.exists(dpath):     
+            r = requests.get(link, stream=True)
+            with open(dpath, 'wb') as f:
+                f.write(r.content)
+        time.sleep(1) #pause the code for a sec
+        line_count +=1