Instructions downloader #54

Merged
FrederikBaerentsen merged 19 commits from instructions into master 2025-01-26 19:17:42 +01:00
2 changed files with 27 additions and 25 deletions
Showing only changes of commit 4a785df532 - Show all commits

View File

@ -9,6 +9,7 @@ from werkzeug.datastructures import FileStorage
from werkzeug.utils import secure_filename from werkzeug.utils import secure_filename
import requests import requests
from bs4 import BeautifulSoup
from .exceptions import ErrorException from .exceptions import ErrorException
if TYPE_CHECKING: if TYPE_CHECKING:
@ -115,11 +116,34 @@ class BrickInstructions(object):
file=self.filename file=self.filename
)) ))
def find_instructions(self, set_id: str, /) -> None:
url = f"https://rebrickable.com/instructions/{set_id}"
print(url)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(url, headers=headers)
if response.status_code != 200:
raise ErrorException('Failed to load page. Status code: {response.status_code}')
# Parse the HTML content
soup = BeautifulSoup(response.content, 'html.parser')
# Collect all <img> tags with "LEGO Building Instructions" in the alt attribute
found_tags = []
for a_tag in soup.find_all('a', href=True):
img_tag = a_tag.find('img', alt=True)
if img_tag and "LEGO Building Instructions" in img_tag['alt']:
found_tags.append((img_tag['alt'].replace('LEGO Building Instructions for ', ''), a_tag['href'])) # Save alt and href
return found_tags
def download(self, href: str, /) -> None: def download(self, href: str, /) -> None:
target = self.path(secure_filename(self.filename)) target = self.path(secure_filename(self.filename))
if os.path.isfile(target): if os.path.isfile(target):
raise ErrorException('Cannot upload {target} as it already exists'.format( # noqa: E501 raise ErrorException('Cannot download {target} as it already exists'.format( # noqa: E501
target=self.filename target=self.filename
)) ))
@ -130,9 +154,8 @@ class BrickInstructions(object):
# Save the file # Save the file
with open(target, 'wb') as file: with open(target, 'wb') as file:
file.write(response.content) file.write(response.content)
print(f"Downloaded {self.filename} to {target}")
else: else:
print(f"Failed to download {self.filename}. Status code: {response.status_code}", 'danger') raise ErrorException(f"Failed to download {self.filename}. Status code: {response.status_code}")
# Info # Info

View File

@ -16,9 +16,6 @@ from ..instructions import BrickInstructions
from ..instructions_list import BrickInstructionsList from ..instructions_list import BrickInstructionsList
from .upload import upload_helper from .upload import upload_helper
import requests
from bs4 import BeautifulSoup
instructions_page = Blueprint( instructions_page = Blueprint(
'instructions', 'instructions',
__name__, __name__,
@ -150,25 +147,7 @@ def download() -> str:
def do_download() -> Response: def do_download() -> Response:
set_id: str = request.form.get('add-set', '') set_id: str = request.form.get('add-set', '')
url = f"https://rebrickable.com/instructions/{set_id}" found_tags = BrickInstructions(set_id).find_instructions(set_id)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(url, headers=headers)
if response.status_code != 200:
flash(f"Failed to load page. Status code: {response.status_code}", 'danger')
return redirect(url_for('instructions.download'))
# Parse the HTML content
soup = BeautifulSoup(response.content, 'html.parser')
# Collect all <img> tags with "LEGO Building Instructions" in the alt attribute
found_tags = []
for a_tag in soup.find_all('a', href=True):
img_tag = a_tag.find('img', alt=True)
if img_tag and "LEGO Building Instructions" in img_tag['alt']:
found_tags.append((img_tag['alt'].replace('LEGO Building Instructions for ', ''), a_tag['href'])) # Save alt and href
return render_template('instructions.html', download=True, found_tags=found_tags) return render_template('instructions.html', download=True, found_tags=found_tags)