From eddf4311d31d84b812519cffc3efcb49644e014f Mon Sep 17 00:00:00 2001 From: Frederik Baerentsen Date: Tue, 23 Sep 2025 17:30:30 +0200 Subject: [PATCH 1/8] Feat(peeron): Initial upload --- bricktracker/peeron_instructions.py | 197 ++++++++++++++++ bricktracker/peeron_pdf.py | 269 ++++++++++++++++++++++ bricktracker/views/instructions.py | 48 +++- templates/instructions/peeron_socket.html | 148 ++++++++++++ templates/peeron_select.html | 88 +++++++ 5 files changed, 741 insertions(+), 9 deletions(-) create mode 100644 bricktracker/peeron_instructions.py create mode 100644 bricktracker/peeron_pdf.py create mode 100644 templates/instructions/peeron_socket.html create mode 100644 templates/peeron_select.html diff --git a/bricktracker/peeron_instructions.py b/bricktracker/peeron_instructions.py new file mode 100644 index 0000000..2a3e302 --- /dev/null +++ b/bricktracker/peeron_instructions.py @@ -0,0 +1,197 @@ +import logging +from typing import Any, NamedTuple, TYPE_CHECKING +from urllib.parse import urljoin + +from bs4 import BeautifulSoup +import cloudscraper +from flask import current_app +import requests + +from .exceptions import ErrorException +if TYPE_CHECKING: + from .socket import BrickSocket + +logger = logging.getLogger(__name__) + + +def get_peeron_user_agent(): + """Get the User-Agent string for Peeron requests from config""" + return current_app.config.get('REBRICKABLE_USER_AGENT', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36') + + +def get_peeron_download_delay(): + """Get the delay in milliseconds between Peeron page downloads from config""" + return current_app.config.get('PEERON_DOWNLOAD_DELAY', 1000) + + +def get_min_image_size(): + """Get the minimum image size for valid Peeron instruction pages from config""" + return current_app.config.get('PEERON_MIN_IMAGE_SIZE', 100) + + +def get_peeron_instruction_url(set_number: str, version_number: str): + """Get the Peeron instruction page URL using the configured pattern""" + pattern = current_app.config.get('PEERON_INSTRUCTION_PATTERN', 'http://peeron.com/scans/{set_number}-{version_number}') + return pattern.format(set_number=set_number, version_number=version_number) + + +def get_peeron_thumbnail_url(set_number: str, version_number: str): + """Get the Peeron thumbnail base URL using the configured pattern""" + pattern = current_app.config.get('PEERON_THUMBNAIL_PATTERN', 'http://belay.peeron.com/thumbs/{set_number}-{version_number}/') + return pattern.format(set_number=set_number, version_number=version_number) + + +def get_peeron_scan_url(set_number: str, version_number: str): + """Get the Peeron scan base URL using the configured pattern""" + pattern = current_app.config.get('PEERON_SCAN_PATTERN', 'http://belay.peeron.com/scans/{set_number}-{version_number}/') + return pattern.format(set_number=set_number, version_number=version_number) + + +def create_peeron_scraper(): + """Create a cloudscraper instance configured for Peeron""" + scraper = cloudscraper.create_scraper() + scraper.headers.update({ + "User-Agent": get_peeron_user_agent() + }) + return scraper + + +class PeeronPage(NamedTuple): + """Represents a single instruction page from Peeron""" + page_number: str + thumbnail_url: str + image_url: str + alt_text: str + + +# Peeron instruction scraper +class PeeronInstructions(object): + socket: 'BrickSocket | None' + set_number: str + version_number: str + pages: list[PeeronPage] + + def __init__( + self, + set_number: str, + version_number: str = '1', + /, + *, + socket: 'BrickSocket | None' = None, + ): + # Save the socket + self.socket = socket + + # Parse set number (handle both "4011" and "4011-1" formats) + if '-' in set_number: + parts = set_number.split('-', 1) + self.set_number = parts[0] + self.version_number = parts[1] if len(parts) > 1 else '1' + else: + self.set_number = set_number + self.version_number = version_number + + # Placeholder for pages + self.pages = [] + + # Check if instructions exist on Peeron + def exists(self, /) -> bool: + """Check if the set exists on Peeron without downloading pages""" + try: + pages = self.find_pages() + return len(pages) > 0 + except ErrorException: + return False + + # Find all available instruction pages on Peeron + def find_pages(self, /) -> list[PeeronPage]: + """ + Scrape Peeron's HTML and return a list of available instruction pages. + Similar to BrickInstructions.find_instructions() but for Peeron. + """ + base_url = get_peeron_instruction_url(self.set_number, self.version_number) + thumb_base_url = get_peeron_thumbnail_url(self.set_number, self.version_number) + scan_base_url = get_peeron_scan_url(self.set_number, self.version_number) + + logger.debug(f"[find_pages] fetching HTML from {base_url!r}") + + # Set up cloudscraper with cookies enabled for Peeron + scraper = create_peeron_scraper() + + # Download the main HTML page + try: + response = scraper.get(base_url) + if response.status_code != 200: + raise ErrorException(f'Failed to load Peeron page for {self.set_number}-{self.version_number}. HTTP {response.status_code}') + except requests.exceptions.RequestException as e: + raise ErrorException(f'Failed to connect to Peeron: {e}') + + # Parse HTML to locate instruction pages + soup = BeautifulSoup(response.text, 'html.parser') + + # Check for "Browse instruction library" header (set not found) + if soup.find('h1', string="Browse instruction library"): + raise ErrorException(f'Set {self.set_number}-{self.version_number} not found on Peeron') + + # Locate all thumbnail images in the expected table structure + thumbnails = soup.select('table[cellspacing="5"] a img[src^="http://belay.peeron.com/thumbs/"]') + + if not thumbnails: + raise ErrorException(f'No instruction pages found for {self.set_number}-{self.version_number} on Peeron') + + pages: list[PeeronPage] = [] + for img in thumbnails: + thumb_url = img['src'] + + # Extract the page number from the thumbnail URL + page_number = thumb_url.split('/')[-2] + + # Build the full-size image URL + image_url = f"{scan_base_url}{page_number}/" + + logger.debug(f"[find_pages] Page {page_number}: thumb={thumb_url}, image={image_url}") + + # Create alt text for the page + alt_text = f"LEGO Instructions {self.set_number}-{self.version_number} Page {page_number}" + + page = PeeronPage( + page_number=page_number, + thumbnail_url=thumb_url, + image_url=image_url, + alt_text=alt_text + ) + pages.append(page) + + # Cache the pages for later use + self.pages = pages + + logger.debug(f"[find_pages] found {len(pages)} pages for {self.set_number}-{self.version_number}") + return pages + + # Find instructions with fallback to Peeron + @staticmethod + def find_instructions_with_peeron_fallback(set: str, /) -> tuple[list[tuple[str, str]], list[PeeronPage] | None]: + """ + Enhanced version of BrickInstructions.find_instructions() that falls back to Peeron. + Returns (rebrickable_instructions, peeron_pages). + If rebrickable_instructions is empty, peeron_pages will contain Peeron data. + """ + from .instructions import BrickInstructions + + # First try Rebrickable + try: + rebrickable_instructions = BrickInstructions.find_instructions(set) + return rebrickable_instructions, None + except ErrorException as e: + logger.info(f"Rebrickable failed for {set}: {e}. Trying Peeron fallback...") + + # Fallback to Peeron + try: + peeron = PeeronInstructions(set) + peeron_pages = peeron.find_pages() + return [], peeron_pages + except ErrorException as peeron_error: + # Both failed, re-raise original Rebrickable error + logger.info(f"Peeron also failed for {set}: {peeron_error}") + raise e from peeron_error \ No newline at end of file diff --git a/bricktracker/peeron_pdf.py b/bricktracker/peeron_pdf.py new file mode 100644 index 0000000..75d5d08 --- /dev/null +++ b/bricktracker/peeron_pdf.py @@ -0,0 +1,269 @@ +import logging +import os +import tempfile +import time +from typing import Any, TYPE_CHECKING + +import cloudscraper +from flask import current_app +from PIL import Image + +from .exceptions import DownloadException, ErrorException +from .instructions import BrickInstructions +from .peeron_instructions import PeeronPage, get_min_image_size, get_peeron_download_delay, get_peeron_instruction_url, create_peeron_scraper +if TYPE_CHECKING: + from .socket import BrickSocket + +logger = logging.getLogger(__name__) + + +# PDF generator for Peeron instruction pages +class PeeronPDF(object): + socket: 'BrickSocket' + set_number: str + version_number: str + pages: list[PeeronPage] + filename: str + + def __init__( + self, + set_number: str, + version_number: str, + pages: list[PeeronPage], + /, + *, + socket: 'BrickSocket', + ): + # Save the socket + self.socket = socket + + # Save set information + self.set_number = set_number + self.version_number = version_number + self.pages = pages + + # Generate filename following BrickTracker conventions + self.filename = f"{set_number}-{version_number}_peeron.pdf" + + # Download pages and create PDF + def create_pdf(self, /) -> None: + """ + Downloads selected Peeron pages and merges them into a PDF. + Uses progress updates via socket similar to BrickInstructions.download() + """ + try: + target_path = self._get_target_path() + + # Skip if we already have it + if os.path.isfile(target_path): + return self.socket.complete( + message=f"File {self.filename} already exists, skipped" + ) + + # Set up progress tracking + total_pages = len(self.pages) + self.socket.update_total(total_pages) + self.socket.progress_count = 0 + self.socket.progress(message=f"Starting download of {total_pages} pages") + + # Set up cloudscraper session for all downloads + scraper = create_peeron_scraper() + + # First visit the main instruction page to establish session with Peeron + try: + main_page_url = get_peeron_instruction_url(self.set_number, self.version_number) + logger.debug(f"Establishing session by visiting: {main_page_url}") + main_response = scraper.get(main_page_url) + logger.debug(f"Main page visit: HTTP {main_response.status_code}") + except Exception as e: + logger.warning(f"Failed to visit main page: {e}") + + # Download images to temporary files + temp_files = [] + failed_pages = [] + + try: + for i, page in enumerate(self.pages): + # Add delay between requests to avoid being blocked + if i > 0: + delay_ms = get_peeron_download_delay() + time.sleep(delay_ms / 1000.0) # Convert milliseconds to seconds + + temp_file = self._download_page_image(page, i + 1, scraper) + if temp_file: + temp_files.append(temp_file) + else: + failed_pages.append(page.page_number) + + if not temp_files: + # Collect detailed error information + error_msg = f"Failed to download any instruction pages for set {self.set_number}-{self.version_number}." + + # Check if it's a bot protection issue by trying to access the main page + try: + test_response = scraper.get(get_peeron_instruction_url(self.set_number, self.version_number)) + if test_response.status_code == 403: + error_msg += " Peeron blocked the request (HTTP 403) - bot protection is active." + elif test_response.status_code == 404: + error_msg += " Set not found on Peeron (HTTP 404)." + elif "Browse instruction library" in test_response.text: + error_msg += " Set exists on Peeron but has no instruction scans available." + else: + min_size = get_min_image_size() + error_msg += f" All pages returned small error images (smaller than {min_size}x{min_size}) - likely bot protection." + except Exception: + error_msg += " Could not connect to Peeron - check internet connection." + + raise DownloadException(error_msg) + + elif len(temp_files) < total_pages: + # Partial success + error_msg = f"Only downloaded {len(temp_files)}/{total_pages} pages successfully." + if failed_pages: + error_msg += f" Failed pages: {', '.join(failed_pages)}." + logger.warning(error_msg) + + # Create PDF from downloaded images + self._create_pdf_from_images(temp_files, target_path) + + # Success + logger.info(f"Created PDF {self.filename} with {len(temp_files)} pages") + self.socket.complete( + message=f"PDF {self.filename} created with {len(temp_files)} pages" + ) + + finally: + # Cleanup temporary files + for temp_file in temp_files: + try: + os.remove(temp_file) + except Exception as e: + logger.warning(f"Failed to remove temp file {temp_file}: {e}") + + except Exception as e: + logger.error(f"Error creating PDF {self.filename}: {e}") + self.socket.fail( + message=f"Error creating PDF {self.filename}: {e}" + ) + + # Download a single page image + def _download_page_image(self, page: PeeronPage, page_num: int, scraper, /) -> str | None: + """Download a single page image to a temporary file using provided scraper session""" + try: + logger.debug(f"Attempting to download page {page.page_number} from: {page.image_url}") + + # Download the image using the shared scraper session + response = scraper.get(page.image_url, stream=True) + logger.debug(f"Page {page.page_number}: HTTP {response.status_code}, Content-Type: {response.headers.get('content-type', 'unknown')}") + + if not response.ok: + logger.warning(f"Failed to download page {page.page_number}: HTTP {response.status_code}") + return None + + # Check if response is actually an image (not an error page) + content_type = response.headers.get('content-type', '') + if not content_type.startswith('image/'): + # Log first 500 chars of response for debugging + try: + response_text = response.text[:500] + logger.warning(f"Page {page.page_number}: Response is not an image (content-type: {content_type}). Response preview: {response_text}") + except: + logger.warning(f"Page {page.page_number}: Response is not an image (content-type: {content_type})") + return None + + # Create temporary file + temp_fd, temp_path = tempfile.mkstemp(suffix='.jpg', prefix=f'peeron_{page.page_number}_') + + try: + with os.fdopen(temp_fd, 'wb') as f: + for chunk in response.iter_content(chunk_size=8192): + if chunk: + f.write(chunk) + + # Validate that we actually got an image (not an HTML error page) + try: + with Image.open(temp_path) as test_img: + width, height = test_img.size + min_size = get_min_image_size() + if width < min_size or height < min_size: # Too small to be a real instruction page + logger.warning(f"Page {page.page_number}: Image too small ({width}x{height}) - likely an error page") + os.remove(temp_path) + return None + except Exception as img_error: + logger.warning(f"Page {page.page_number}: Invalid image file - {img_error}") + os.remove(temp_path) + return None + + # Update progress + self.socket.progress_count += 1 + self.socket.progress( + message=f"Downloaded page {page.page_number} ({page_num}/{len(self.pages)})" + ) + + return temp_path + + except Exception as e: + # Clean up file descriptor if something went wrong + try: + os.close(temp_fd) + except: + pass + try: + os.remove(temp_path) + except: + pass + raise e + + except Exception as e: + logger.warning(f"Failed to download page {page.page_number}: {e}") + return None + + # Create PDF from downloaded images + def _create_pdf_from_images(self, image_paths: list[str], output_path: str, /) -> None: + """Create a PDF from a list of image files""" + try: + # Import FPDF (should be available from requirements) + from fpdf import FPDF + except ImportError: + raise ErrorException("FPDF library not available. Install with: pip install fpdf2") + + pdf = FPDF() + + for i, img_path in enumerate(image_paths): + try: + # Open image to get dimensions + with Image.open(img_path) as image: + width, height = image.size + + # Add page with image dimensions (convert pixels to mm) + # 1 pixel = 0.264583 mm (assuming 96 DPI) + page_width = width * 0.264583 + page_height = height * 0.264583 + + pdf.add_page(format=(page_width, page_height)) + pdf.image(img_path, x=0, y=0, w=page_width, h=page_height) + + # Update progress + progress_msg = f"Processing page {i + 1}/{len(image_paths)} into PDF" + self.socket.progress(message=progress_msg) + + except Exception as e: + logger.warning(f"Failed to add image {img_path} to PDF: {e}") + continue + + # Save the PDF + pdf.output(output_path) + + # Get target file path + def _get_target_path(self, /) -> str: + """Get the full path where the PDF should be saved""" + instructions_folder = os.path.join( + current_app.static_folder, # type: ignore + current_app.config['INSTRUCTIONS_FOLDER'] + ) + return os.path.join(instructions_folder, self.filename) + + # Create BrickInstructions instance for the generated PDF + def get_instructions(self, /) -> BrickInstructions: + """Return a BrickInstructions instance for the generated PDF""" + return BrickInstructions(self.filename) \ No newline at end of file diff --git a/bricktracker/views/instructions.py b/bricktracker/views/instructions.py index 2c2138a..dc44e34 100644 --- a/bricktracker/views/instructions.py +++ b/bricktracker/views/instructions.py @@ -14,6 +14,7 @@ from .exceptions import exception_handler from ..instructions import BrickInstructions from ..instructions_list import BrickInstructionsList from ..parser import parse_set +from ..peeron_instructions import PeeronInstructions from ..socket import MESSAGES from .upload import upload_helper @@ -160,12 +161,41 @@ def do_download() -> str: except Exception: set = '' - return render_template( - 'instructions.html', - download=True, - instructions=BrickInstructions.find_instructions(set), - set=set, - path=current_app.config['SOCKET_PATH'], - namespace=current_app.config['SOCKET_NAMESPACE'], - messages=MESSAGES - ) + # Try Rebrickable first, fallback to Peeron if it fails + rebrickable_instructions, peeron_pages = PeeronInstructions.find_instructions_with_peeron_fallback(set) + + # Determine which template to render based on what we found + if rebrickable_instructions: + # Standard Rebrickable instructions found + return render_template( + 'instructions.html', + download=True, + instructions=rebrickable_instructions, + set=set, + path=current_app.config['SOCKET_PATH'], + namespace=current_app.config['SOCKET_NAMESPACE'], + messages=MESSAGES + ) + elif peeron_pages: + # Peeron pages found - show page selection interface + return render_template( + 'peeron_select.html', + download=True, + pages=peeron_pages, + set=set, + path=current_app.config['SOCKET_PATH'], + namespace=current_app.config['SOCKET_NAMESPACE'], + messages=MESSAGES + ) + else: + # This shouldn't happen as the fallback method re-raises the original error + return render_template( + 'instructions.html', + download=True, + instructions=[], + set=set, + error='No instructions found on Rebrickable or Peeron', + path=current_app.config['SOCKET_PATH'], + namespace=current_app.config['SOCKET_NAMESPACE'], + messages=MESSAGES + ) diff --git a/templates/instructions/peeron_socket.html b/templates/instructions/peeron_socket.html new file mode 100644 index 0000000..6fecd0c --- /dev/null +++ b/templates/instructions/peeron_socket.html @@ -0,0 +1,148 @@ + \ No newline at end of file diff --git a/templates/peeron_select.html b/templates/peeron_select.html new file mode 100644 index 0000000..9ee8657 --- /dev/null +++ b/templates/peeron_select.html @@ -0,0 +1,88 @@ +{% extends 'base.html' %} + +{% block title %} - Download instructions from Peeron{% endblock %} + +{% block main %} +
+ {% if error %}{% endif %} +
+
+
+
+
+
Download instructions from Rebrickable
+
+
+
+ + +
+
+ +
+
+ {% if pages %} + +
+
+
Select instructions to download
+
+
+
+ +
+
Available Instructions
+
+ {% for page in pages %} +
+
+
+
+ + +
+
+
+
+ {% endfor %} +
+
+
+
+

+ Progress + + + Loading... + +

+
+
+
+

+
+
+ +
+ {% include 'instructions/peeron_socket.html' %} + {% endif %} +
+
+
+{% endblock %} \ No newline at end of file From 787624c432575ab0c972eb1050672416d6d72943 Mon Sep 17 00:00:00 2001 From: Frederik Baerentsen Date: Tue, 23 Sep 2025 17:46:46 +0200 Subject: [PATCH 2/8] Added env variables and fixed socket for peeron --- .env.sample | 33 +++++++++++++++++++++++--- bricktracker/socket.py | 53 ++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 2 ++ 3 files changed, 85 insertions(+), 3 deletions(-) diff --git a/.env.sample b/.env.sample index ae1fc46..ffa6b49 100644 --- a/.env.sample +++ b/.env.sample @@ -262,9 +262,36 @@ # Default: https://rebrickable.com/instructions/{path} # BK_REBRICKABLE_LINK_INSTRUCTIONS_PATTERN= -# Optional: User-Agent to use when querying Rebrickable outside of the Rebrick python library -# Default: 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' -# BK_REBRICKABLE_USER_AGENT= +# Optional: User-Agent to use when querying Rebrickable and Peeron outside of the Rebrick python library +# Default: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36 +# BK_USER_AGENT=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36 + +# Legacy: User-Agent for Rebrickable (use BK_USER_AGENT instead) +# BK_REBRICKABLE_USER_AGENT=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36 + +# Optional: Delay in milliseconds between Peeron page downloads to avoid being potentially blocked +# Default: 1000 +# BK_PEERON_DOWNLOAD_DELAY=1000 + +# Optional: Minimum image size (width/height) for valid Peeron instruction pages +# Images smaller than this are considered error placeholders and will be rejected +# Default: 100 +# BK_PEERON_MIN_IMAGE_SIZE=100 + +# Optional: Pattern for Peeron instruction page URLs. Will be passed to Python .format() +# Supports {set_number} and {version_number} parameters +# Default: http://peeron.com/scans/{set_number}-{version_number} +# BK_PEERON_INSTRUCTION_PATTERN= + +# Optional: Pattern for Peeron thumbnail URLs. Will be passed to Python .format() +# Supports {set_number} and {version_number} parameters +# Default: http://belay.peeron.com/thumbs/{set_number}-{version_number}/ +# BK_PEERON_THUMBNAIL_PATTERN= + +# Optional: Pattern for Peeron scan URLs. Will be passed to Python .format() +# Supports {set_number} and {version_number} parameters +# Default: http://belay.peeron.com/scans/{set_number}-{version_number}/ +# BK_PEERON_SCAN_PATTERN= # Optional: Display Rebrickable links wherever applicable # Default: false diff --git a/bricktracker/socket.py b/bricktracker/socket.py index 10c3358..fd64389 100644 --- a/bricktracker/socket.py +++ b/bricktracker/socket.py @@ -6,6 +6,8 @@ from flask_socketio import SocketIO from .instructions import BrickInstructions from .instructions_list import BrickInstructionsList +from .peeron_instructions import PeeronPage +from .peeron_pdf import PeeronPDF from .set import BrickSet from .socket_decorator import authenticated_socket, rebrickable_socket from .sql import close as sql_close @@ -18,6 +20,7 @@ MESSAGES: Final[dict[str, str]] = { 'CONNECT': 'connect', 'DISCONNECT': 'disconnect', 'DOWNLOAD_INSTRUCTIONS': 'download_instructions', + 'DOWNLOAD_PEERON_PAGES': 'download_peeron_pages', 'FAIL': 'fail', 'IMPORT_SET': 'import_set', 'LOAD_SET': 'load_set', @@ -105,6 +108,56 @@ class BrickSocket(object): instructions.download(path) BrickInstructionsList(force=True) + + @self.socket.on(MESSAGES['DOWNLOAD_PEERON_PAGES'], namespace=self.namespace) # noqa: E501 + @authenticated_socket(self) + def download_peeron_pages(data: dict[str, Any], /) -> None: + logger.debug('Socket: DOWNLOAD_PEERON_PAGES={data} (from: {fr})'.format( + data=data, + fr=request.sid, # type: ignore + )) + + try: + # Extract data from the request + set_number = data.get('set', '') + pages_data = data.get('pages', []) + + if not set_number: + raise ValueError("Set number is required") + + if not pages_data: + raise ValueError("No pages selected") + + # Parse set number + if '-' in set_number: + parts = set_number.split('-', 1) + set_num = parts[0] + version_num = parts[1] if len(parts) > 1 else '1' + else: + set_num = set_number + version_num = '1' + + # Convert page data to PeeronPage objects + pages = [] + for page_data in pages_data: + page = PeeronPage( + page_number=page_data.get('page_number', ''), + thumbnail_url=page_data.get('thumbnail_url', ''), + image_url=page_data.get('image_url', ''), + alt_text=page_data.get('alt_text', '') + ) + pages.append(page) + + # Create PDF generator and start download + pdf_generator = PeeronPDF(set_num, version_num, pages, socket=self) + pdf_generator.create_pdf() + + # Refresh instructions list to include new PDF + BrickInstructionsList(force=True) + + except Exception as e: + logger.error(f"Error in download_peeron_pages: {e}") + self.fail(message=f"Error downloading Peeron pages: {e}") @self.socket.on(MESSAGES['IMPORT_SET'], namespace=self.namespace) @rebrickable_socket(self) diff --git a/requirements.txt b/requirements.txt index 7cd3644..f0e433d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,5 @@ requests tzdata bs4 cloudscraper +fpdf2 +pillow \ No newline at end of file From 74fe14f09bbd25b892624acbbf54de9c2547f19d Mon Sep 17 00:00:00 2001 From: Frederik Baerentsen Date: Thu, 25 Sep 2025 20:47:41 +0200 Subject: [PATCH 3/8] Added rotation, moved select all, added link after download --- bricktracker/app.py | 3 +- bricktracker/instructions.py | 6 +- bricktracker/peeron_instructions.py | 1 + bricktracker/peeron_pdf.py | 76 ++++++++++++++++------- bricktracker/socket.py | 3 +- templates/instructions/peeron_socket.html | 64 ++++++++++++++----- templates/peeron_select.html | 65 ++++++++++++++++++- 7 files changed, 174 insertions(+), 44 deletions(-) diff --git a/bricktracker/app.py b/bricktracker/app.py index d993d99..5f57bd8 100644 --- a/bricktracker/app.py +++ b/bricktracker/app.py @@ -60,7 +60,8 @@ def setup_app(app: Flask) -> None: # Setup the login manager LoginManager(app) - # I don't know :-) + # Configure proxy header handling for reverse proxy deployments (nginx, Apache, etc.) + # This ensures proper client IP detection and HTTPS scheme recognition app.wsgi_app = ProxyFix( app.wsgi_app, x_for=1, diff --git a/bricktracker/instructions.py b/bricktracker/instructions.py index e0c4521..0b640ce 100644 --- a/bricktracker/instructions.py +++ b/bricktracker/instructions.py @@ -101,8 +101,9 @@ class BrickInstructions(object): # Skip if we already have it if os.path.isfile(target): + pdf_url = self.url() return self.socket.complete( - message=f"File {self.filename} already exists, skipped" + message=f'File {self.filename} already exists, skipped - Open PDF' ) # Fetch PDF via cloudscraper (to bypass Cloudflare) @@ -141,8 +142,9 @@ class BrickInstructions(object): # Done! logger.info(f"Downloaded {self.filename}") + pdf_url = self.url() self.socket.complete( - message=f"File {self.filename} downloaded ({self.human_size()})" + message=f'File {self.filename} downloaded ({self.human_size()}) - Open PDF' ) except Exception as e: diff --git a/bricktracker/peeron_instructions.py b/bricktracker/peeron_instructions.py index 2a3e302..e4c2e35 100644 --- a/bricktracker/peeron_instructions.py +++ b/bricktracker/peeron_instructions.py @@ -63,6 +63,7 @@ class PeeronPage(NamedTuple): thumbnail_url: str image_url: str alt_text: str + rotation: int = 0 # Rotation in degrees (0, 90, 180, 270) # Peeron instruction scraper diff --git a/bricktracker/peeron_pdf.py b/bricktracker/peeron_pdf.py index 75d5d08..19e249b 100644 --- a/bricktracker/peeron_pdf.py +++ b/bricktracker/peeron_pdf.py @@ -56,8 +56,11 @@ class PeeronPDF(object): # Skip if we already have it if os.path.isfile(target_path): + # Create BrickInstructions instance to get PDF URL + instructions = BrickInstructions(self.filename) + pdf_url = instructions.url() return self.socket.complete( - message=f"File {self.filename} already exists, skipped" + message=f'File {self.filename} already exists, skipped - Open PDF' ) # Set up progress tracking @@ -78,8 +81,8 @@ class PeeronPDF(object): except Exception as e: logger.warning(f"Failed to visit main page: {e}") - # Download images to temporary files - temp_files = [] + # Download images to temporary files with rotation info + temp_files_with_rotation = [] failed_pages = [] try: @@ -91,11 +94,11 @@ class PeeronPDF(object): temp_file = self._download_page_image(page, i + 1, scraper) if temp_file: - temp_files.append(temp_file) + temp_files_with_rotation.append((temp_file, page.rotation)) else: failed_pages.append(page.page_number) - if not temp_files: + if not temp_files_with_rotation: # Collect detailed error information error_msg = f"Failed to download any instruction pages for set {self.set_number}-{self.version_number}." @@ -116,25 +119,30 @@ class PeeronPDF(object): raise DownloadException(error_msg) - elif len(temp_files) < total_pages: + elif len(temp_files_with_rotation) < total_pages: # Partial success - error_msg = f"Only downloaded {len(temp_files)}/{total_pages} pages successfully." + error_msg = f"Only downloaded {len(temp_files_with_rotation)}/{total_pages} pages successfully." if failed_pages: error_msg += f" Failed pages: {', '.join(failed_pages)}." logger.warning(error_msg) - # Create PDF from downloaded images - self._create_pdf_from_images(temp_files, target_path) + # Create PDF from downloaded images with rotation + self._create_pdf_from_images(temp_files_with_rotation, target_path) # Success - logger.info(f"Created PDF {self.filename} with {len(temp_files)} pages") + logger.info(f"Created PDF {self.filename} with {len(temp_files_with_rotation)} pages") + + # Create BrickInstructions instance to get PDF URL + instructions = BrickInstructions(self.filename) + pdf_url = instructions.url() + self.socket.complete( - message=f"PDF {self.filename} created with {len(temp_files)} pages" + message=f'PDF {self.filename} created with {len(temp_files_with_rotation)} pages - Open PDF' ) finally: # Cleanup temporary files - for temp_file in temp_files: + for temp_file, _ in temp_files_with_rotation: try: os.remove(temp_file) except Exception as e: @@ -219,8 +227,8 @@ class PeeronPDF(object): return None # Create PDF from downloaded images - def _create_pdf_from_images(self, image_paths: list[str], output_path: str, /) -> None: - """Create a PDF from a list of image files""" + def _create_pdf_from_images(self, image_paths_and_rotations: list[tuple[str, int]], output_path: str, /) -> None: + """Create a PDF from a list of image files with their rotations""" try: # Import FPDF (should be available from requirements) from fpdf import FPDF @@ -229,22 +237,44 @@ class PeeronPDF(object): pdf = FPDF() - for i, img_path in enumerate(image_paths): + for i, (img_path, rotation) in enumerate(image_paths_and_rotations): try: - # Open image to get dimensions + # Open image and apply rotation if needed with Image.open(img_path) as image: + # Apply rotation if specified + if rotation != 0: + # PIL rotation is counter-clockwise, so we negate for clockwise rotation + image = image.rotate(-rotation, expand=True) + width, height = image.size - # Add page with image dimensions (convert pixels to mm) - # 1 pixel = 0.264583 mm (assuming 96 DPI) - page_width = width * 0.264583 - page_height = height * 0.264583 + # Add page with image dimensions (convert pixels to mm) + # 1 pixel = 0.264583 mm (assuming 96 DPI) + page_width = width * 0.264583 + page_height = height * 0.264583 - pdf.add_page(format=(page_width, page_height)) - pdf.image(img_path, x=0, y=0, w=page_width, h=page_height) + pdf.add_page(format=(page_width, page_height)) + + # Save rotated image to temporary file for FPDF + temp_rotated_path = None + if rotation != 0: + import tempfile + temp_fd, temp_rotated_path = tempfile.mkstemp(suffix='.jpg', prefix=f'peeron_rotated_{i}_') + try: + os.close(temp_fd) # Close file descriptor, we'll use the path + image.save(temp_rotated_path, 'JPEG', quality=95) + pdf.image(temp_rotated_path, x=0, y=0, w=page_width, h=page_height) + finally: + # Clean up rotated temp file + if temp_rotated_path and os.path.exists(temp_rotated_path): + os.remove(temp_rotated_path) + else: + pdf.image(img_path, x=0, y=0, w=page_width, h=page_height) # Update progress - progress_msg = f"Processing page {i + 1}/{len(image_paths)} into PDF" + progress_msg = f"Processing page {i + 1}/{len(image_paths_and_rotations)} into PDF" + if rotation != 0: + progress_msg += f" (rotated {rotation}°)" self.socket.progress(message=progress_msg) except Exception as e: diff --git a/bricktracker/socket.py b/bricktracker/socket.py index fd64389..873f066 100644 --- a/bricktracker/socket.py +++ b/bricktracker/socket.py @@ -144,7 +144,8 @@ class BrickSocket(object): page_number=page_data.get('page_number', ''), thumbnail_url=page_data.get('thumbnail_url', ''), image_url=page_data.get('image_url', ''), - alt_text=page_data.get('alt_text', '') + alt_text=page_data.get('alt_text', ''), + rotation=page_data.get('rotation', 0) ) pages.append(page) diff --git a/templates/instructions/peeron_socket.html b/templates/instructions/peeron_socket.html index 6fecd0c..136ca38 100644 --- a/templates/instructions/peeron_socket.html +++ b/templates/instructions/peeron_socket.html @@ -22,32 +22,67 @@ window.addEventListener('load', () => { }); } - // Add select all button - this.add_select_all_button(); + // Setup select all button (now in template) + this.setup_select_all_button(); + + // Setup rotation buttons + this.setup_rotation_buttons(); // Setup the socket this.setup(); } - add_select_all_button() { - if (this.html_button) { - const selectAllButton = document.createElement('button'); - selectAllButton.type = 'button'; - selectAllButton.className = 'btn btn-sm btn-outline-secondary me-2'; - selectAllButton.innerHTML = ' Select All'; + setup_select_all_button() { + const selectAllButton = document.getElementById('peeron-select-all'); + if (selectAllButton) { selectAllButton.addEventListener('click', () => { const checkboxes = this.get_files(); const allChecked = checkboxes.every(cb => cb.checked); checkboxes.forEach(cb => cb.checked = !allChecked); - selectAllButton.innerHTML = allChecked ? - ' Select All' : - ' Deselect All'; - }); - this.html_button.parentNode.insertBefore(selectAllButton, this.html_button); + // Update button text and icon + if (allChecked) { + selectAllButton.innerHTML = ' Select All'; + } else { + selectAllButton.innerHTML = ' Deselect All'; + } + }); } } + setup_rotation_buttons() { + document.querySelectorAll('.peeron-rotate-btn').forEach(button => { + button.addEventListener('click', (e) => { + e.preventDefault(); // Prevent label click + e.stopPropagation(); // Stop event bubbling + + const targetId = button.getAttribute('data-target'); + const checkboxId = button.getAttribute('data-checkbox'); + const targetImg = document.getElementById(targetId); + const checkbox = document.getElementById(checkboxId); + + if (targetImg && checkbox) { + let currentRotation = parseInt(button.getAttribute('data-rotation') || '0'); + currentRotation = (currentRotation + 90) % 360; + + // Update image rotation + targetImg.style.transform = `rotate(${currentRotation}deg)`; + button.setAttribute('data-rotation', currentRotation.toString()); + + // Store rotation in checkbox data for later use + checkbox.setAttribute('data-rotation', currentRotation.toString()); + + // Update the rotation icon to indicate current state + const icon = button.querySelector('i'); + if (icon) { + // Rotate the icon to match the image rotation + icon.style.transform = `rotate(${currentRotation}deg)`; + } + } + }); + }); + } + complete(data) { super.complete(data); @@ -100,7 +135,8 @@ window.addEventListener('load', () => { page_number: checkbox.getAttribute('data-page-number'), thumbnail_url: checkbox.getAttribute('data-thumbnail-url'), image_url: checkbox.getAttribute('data-image-url'), - alt_text: checkbox.getAttribute('data-alt-text') + alt_text: checkbox.getAttribute('data-alt-text'), + rotation: parseInt(checkbox.getAttribute('data-rotation') || '0') })); this.clear(); diff --git a/templates/peeron_select.html b/templates/peeron_select.html index 9ee8657..6245188 100644 --- a/templates/peeron_select.html +++ b/templates/peeron_select.html @@ -35,7 +35,12 @@
-
Available Instructions
+
+
Available Instructions
+ +
{% for page in pages %}
@@ -47,11 +52,22 @@ data-thumbnail-url="{{ page.thumbnail_url }}" data-image-url="{{ page.image_url }}" data-alt-text="{{ page.alt_text }}" + data-rotation="0" autocomplete="off">
+ + {% endblock %} \ No newline at end of file From 0a2954393974f0eb05a5cb136efd98624832b8a0 Mon Sep 17 00:00:00 2001 From: Frederik Baerentsen Date: Thu, 25 Sep 2025 21:42:15 +0200 Subject: [PATCH 4/8] Cleanup of peeron download --- static/scripts/socket/peeron.js | 170 +++++++++++++++++++++ templates/base.html | 6 + templates/instructions/peeron_socket.html | 178 +--------------------- templates/peeron_select.html | 46 +----- 4 files changed, 179 insertions(+), 221 deletions(-) create mode 100644 static/scripts/socket/peeron.js diff --git a/static/scripts/socket/peeron.js b/static/scripts/socket/peeron.js new file mode 100644 index 0000000..36345cd --- /dev/null +++ b/static/scripts/socket/peeron.js @@ -0,0 +1,170 @@ +// Peeron Socket class +class BrickPeeronSocket extends BrickSocket { + constructor(id, path, namespace, messages) { + super(id, path, namespace, messages, true); + + // Form elements (built based on the initial id) + this.html_button = document.getElementById(id); + this.html_files = document.getElementById(`${id}-files`); + + if (this.html_button) { + this.html_button.addEventListener("click", (e) => { + this.execute(); + }); + } + + // Setup select all button + this.setup_select_all_button(); + + // Setup rotation buttons + this.setup_rotation_buttons(); + + // Setup the socket + this.setup(); + } + + setup_select_all_button() { + const selectAllButton = document.getElementById('peeron-select-all'); + if (selectAllButton) { + selectAllButton.addEventListener('click', () => { + const checkboxes = this.get_files(); + const allChecked = checkboxes.every(cb => cb.checked); + checkboxes.forEach(cb => cb.checked = !allChecked); + + // Update button text and icon + if (allChecked) { + selectAllButton.innerHTML = ' Select All'; + } else { + selectAllButton.innerHTML = ' Deselect All'; + } + }); + } + } + + setup_rotation_buttons() { + document.querySelectorAll('.peeron-rotate-btn').forEach(button => { + button.addEventListener('click', (e) => { + e.preventDefault(); // Prevent label click + e.stopPropagation(); // Stop event bubbling + + const targetId = button.getAttribute('data-target'); + const checkboxId = button.getAttribute('data-checkbox'); + const targetImg = document.getElementById(targetId); + const checkbox = document.getElementById(checkboxId); + + if (targetImg && checkbox) { + let currentRotation = parseInt(button.getAttribute('data-rotation') || '0'); + currentRotation = (currentRotation + 90) % 360; + + // Update image rotation + targetImg.style.transform = `rotate(${currentRotation}deg)`; + button.setAttribute('data-rotation', currentRotation.toString()); + + // Store rotation in checkbox data for later use + checkbox.setAttribute('data-rotation', currentRotation.toString()); + + // Update the rotation icon to indicate current state + const icon = button.querySelector('i'); + if (icon) { + // Rotate the icon to match the image rotation + icon.style.transform = `rotate(${currentRotation}deg)`; + } + } + }); + }); + } + + // Upon receiving a complete message + complete(data) { + super.complete(data); + + // Clear progress display after completion + if (this.html_progress_message) { + this.html_progress_message.classList.add("d-none"); + this.html_progress_message.textContent = ""; + } + + if (this.html_count) { + this.html_count.classList.add("d-none"); + this.html_count.textContent = ""; + } + + // Ensure spinner is hidden + this.spinner(false); + + this.toggle(true); + } + + // Execute the action + execute() { + if (!this.disabled && this.socket !== undefined && this.socket.connected) { + this.toggle(false); + + this.download_peeron_pages(); + } + } + + // Get the list of checkboxes describing files + get_files(checked=false) { + let files = []; + + if (this.html_files) { + files = [...this.html_files.querySelectorAll('input[type="checkbox"]')]; + + if (checked) { + files = files.filter(file => file.checked); + } + } + + return files; + } + + // Download Peeron pages + download_peeron_pages() { + if (this.html_files) { + const selectedFiles = this.get_files(true); + + if (selectedFiles.length === 0) { + this.fail({message: "Please select at least one page to download."}); + this.toggle(true); + return; + } + + const pages = selectedFiles.map(checkbox => ({ + page_number: checkbox.getAttribute('data-page-number'), + thumbnail_url: checkbox.getAttribute('data-thumbnail-url'), + image_url: checkbox.getAttribute('data-image-url'), + alt_text: checkbox.getAttribute('data-alt-text'), + rotation: parseInt(checkbox.getAttribute('data-rotation') || '0') + })); + + this.clear(); + this.spinner(true); + + const setElement = document.querySelector('input[name="download-set"]'); + const set = setElement ? setElement.value : ''; + + this.socket.emit(this.messages.DOWNLOAD_PEERON_PAGES, { + set: set, + pages: pages, + total: pages.length, + current: 0 + }); + } else { + this.fail({message: "Could not find the list of pages to download"}); + } + } + + // Toggle clicking on the button, or sending events + toggle(enabled) { + super.toggle(enabled); + + if (this.html_files) { + this.get_files().forEach(el => el.disabled = !enabled); + } + + if (this.html_button) { + this.html_button.disabled = !enabled; + } + } +} \ No newline at end of file diff --git a/templates/base.html b/templates/base.html index 286ca5d..d52beb8 100644 --- a/templates/base.html +++ b/templates/base.html @@ -104,6 +104,12 @@ {% endif %} {% if request.endpoint == 'set.list' %} +{% endif %} + {% if request.endpoint == 'set.details' %} + + {% endif %} + {% if request.endpoint == 'instructions.download' or request.endpoint == 'instructions.do_download' %} + {% endif %} \ No newline at end of file diff --git a/templates/peeron_select.html b/templates/peeron_select.html index 6245188..6a61932 100644 --- a/templates/peeron_select.html +++ b/templates/peeron_select.html @@ -58,8 +58,7 @@
{{ page.alt_text }} + class="img-fluid mb-2 border rounded" style="max-height: 150px; transform: rotate(0deg); transition: transform 0.3s ease;">
Page {{ page.page_number }}
-
Loading dimensions...
@@ -102,46 +100,4 @@ - {% endblock %} \ No newline at end of file From ec4f44a3ab332db797cb41ffa445ed03603246c0 Mon Sep 17 00:00:00 2001 From: Frederik Baerentsen Date: Thu, 25 Sep 2025 21:46:58 +0200 Subject: [PATCH 5/8] Removed unused import --- templates/base.html | 3 --- 1 file changed, 3 deletions(-) diff --git a/templates/base.html b/templates/base.html index d52beb8..c548e8f 100644 --- a/templates/base.html +++ b/templates/base.html @@ -104,9 +104,6 @@ {% endif %} {% if request.endpoint == 'set.list' %} -{% endif %} - {% if request.endpoint == 'set.details' %} - {% endif %} {% if request.endpoint == 'instructions.download' or request.endpoint == 'instructions.do_download' %} From 4bc0ef5cc4e3c65855b5ffb98b7a38ab622b388e Mon Sep 17 00:00:00 2001 From: Frederik Baerentsen Date: Thu, 25 Sep 2025 22:09:36 +0200 Subject: [PATCH 6/8] Peeron thumbnails cache, as peeron uses http and cant live link to https --- bricktracker/peeron_instructions.py | 105 +++++++++++++++++++++++++++- templates/peeron_select.html | 2 +- 2 files changed, 105 insertions(+), 2 deletions(-) diff --git a/bricktracker/peeron_instructions.py b/bricktracker/peeron_instructions.py index e4c2e35..9aec4c2 100644 --- a/bricktracker/peeron_instructions.py +++ b/bricktracker/peeron_instructions.py @@ -1,10 +1,14 @@ +import hashlib import logging +import os +from pathlib import Path +import time from typing import Any, NamedTuple, TYPE_CHECKING from urllib.parse import urljoin from bs4 import BeautifulSoup import cloudscraper -from flask import current_app +from flask import current_app, url_for import requests from .exceptions import ErrorException @@ -57,10 +61,105 @@ def create_peeron_scraper(): return scraper +def get_thumbnail_cache_dir(): + """Get the directory for thumbnail caching""" + static_dir = Path(current_app.static_folder) + cache_dir = static_dir / 'images' / 'peeron_cache' + cache_dir.mkdir(parents=True, exist_ok=True) + return cache_dir + + +def get_cached_thumbnail_filename(thumbnail_url: str) -> str: + """Generate a filename for caching thumbnails based on URL""" + # Create hash of the URL to avoid filename issues + url_hash = hashlib.md5(thumbnail_url.encode()).hexdigest() + # Extract file extension from URL, default to .jpg + ext = '.jpg' + if '.' in thumbnail_url: + url_ext = '.' + thumbnail_url.split('.')[-1].lower() + if url_ext in ['.jpg', '.jpeg', '.png', '.gif']: + ext = url_ext + return f"{url_hash}{ext}" + + +def cache_thumbnail(thumbnail_url: str) -> str | None: + """ + Download and cache a thumbnail image, return the local URL path. + Returns None if caching fails. + """ + try: + cache_dir = get_thumbnail_cache_dir() + filename = get_cached_thumbnail_filename(thumbnail_url) + cache_path = cache_dir / filename + + # Return existing cached file if it exists + if cache_path.exists(): + return url_for('static', filename=f'images/peeron_cache/{filename}') + + # Download the thumbnail + scraper = create_peeron_scraper() + response = scraper.get(thumbnail_url, timeout=10) + + if response.status_code == 200 and len(response.content) > 0: + # Validate it's actually an image by checking minimum size + min_size = get_min_image_size() + if len(response.content) < min_size: + logger.warning(f"Thumbnail too small, skipping cache: {thumbnail_url}") + return None + + # Write to cache + with open(cache_path, 'wb') as f: + f.write(response.content) + + logger.debug(f"Cached thumbnail: {thumbnail_url} -> {cache_path}") + return url_for('static', filename=f'images/peeron_cache/{filename}') + else: + logger.warning(f"Failed to download thumbnail: {thumbnail_url}") + return None + + except Exception as e: + logger.error(f"Error caching thumbnail {thumbnail_url}: {e}") + return None + + +def clear_thumbnail_cache(max_age_days: int = 30) -> int: + """ + Clear old thumbnail cache files. + Returns the number of files deleted. + """ + try: + cache_dir = get_thumbnail_cache_dir() + if not cache_dir.exists(): + return 0 + + deleted_count = 0 + max_age_seconds = max_age_days * 24 * 60 * 60 + current_time = time.time() + + for cache_file in cache_dir.glob('*'): + if cache_file.is_file(): + file_age = current_time - os.path.getmtime(cache_file) + if file_age > max_age_seconds: + try: + cache_file.unlink() + deleted_count += 1 + logger.debug(f"Deleted old cache file: {cache_file}") + except OSError as e: + logger.warning(f"Failed to delete cache file {cache_file}: {e}") + + logger.info(f"Thumbnail cache cleanup completed: {deleted_count} files deleted") + return deleted_count + + except Exception as e: + logger.error(f"Error during cache cleanup: {e}") + return 0 + + class PeeronPage(NamedTuple): """Represents a single instruction page from Peeron""" page_number: str thumbnail_url: str + cached_thumbnail_url: str | None # Local cached thumbnail URL image_url: str alt_text: str rotation: int = 0 # Rotation in degrees (0, 90, 180, 270) @@ -156,9 +255,13 @@ class PeeronInstructions(object): # Create alt text for the page alt_text = f"LEGO Instructions {self.set_number}-{self.version_number} Page {page_number}" + # Cache the thumbnail + cached_thumb_url = cache_thumbnail(thumb_url) + page = PeeronPage( page_number=page_number, thumbnail_url=thumb_url, + cached_thumbnail_url=cached_thumb_url, image_url=image_url, alt_text=alt_text ) diff --git a/templates/peeron_select.html b/templates/peeron_select.html index 6a61932..6e4c45b 100644 --- a/templates/peeron_select.html +++ b/templates/peeron_select.html @@ -57,7 +57,7 @@