diff --git a/.env.sample b/.env.sample index ae1fc46..ffa6b49 100644 --- a/.env.sample +++ b/.env.sample @@ -262,9 +262,36 @@ # Default: https://rebrickable.com/instructions/{path} # BK_REBRICKABLE_LINK_INSTRUCTIONS_PATTERN= -# Optional: User-Agent to use when querying Rebrickable outside of the Rebrick python library -# Default: 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' -# BK_REBRICKABLE_USER_AGENT= +# Optional: User-Agent to use when querying Rebrickable and Peeron outside of the Rebrick python library +# Default: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36 +# BK_USER_AGENT=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36 + +# Legacy: User-Agent for Rebrickable (use BK_USER_AGENT instead) +# BK_REBRICKABLE_USER_AGENT=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36 + +# Optional: Delay in milliseconds between Peeron page downloads to avoid being potentially blocked +# Default: 1000 +# BK_PEERON_DOWNLOAD_DELAY=1000 + +# Optional: Minimum image size (width/height) for valid Peeron instruction pages +# Images smaller than this are considered error placeholders and will be rejected +# Default: 100 +# BK_PEERON_MIN_IMAGE_SIZE=100 + +# Optional: Pattern for Peeron instruction page URLs. Will be passed to Python .format() +# Supports {set_number} and {version_number} parameters +# Default: http://peeron.com/scans/{set_number}-{version_number} +# BK_PEERON_INSTRUCTION_PATTERN= + +# Optional: Pattern for Peeron thumbnail URLs. Will be passed to Python .format() +# Supports {set_number} and {version_number} parameters +# Default: http://belay.peeron.com/thumbs/{set_number}-{version_number}/ +# BK_PEERON_THUMBNAIL_PATTERN= + +# Optional: Pattern for Peeron scan URLs. Will be passed to Python .format() +# Supports {set_number} and {version_number} parameters +# Default: http://belay.peeron.com/scans/{set_number}-{version_number}/ +# BK_PEERON_SCAN_PATTERN= # Optional: Display Rebrickable links wherever applicable # Default: false diff --git a/bricktracker/app.py b/bricktracker/app.py index d993d99..5f57bd8 100644 --- a/bricktracker/app.py +++ b/bricktracker/app.py @@ -60,7 +60,8 @@ def setup_app(app: Flask) -> None: # Setup the login manager LoginManager(app) - # I don't know :-) + # Configure proxy header handling for reverse proxy deployments (nginx, Apache, etc.) + # This ensures proper client IP detection and HTTPS scheme recognition app.wsgi_app = ProxyFix( app.wsgi_app, x_for=1, diff --git a/bricktracker/instructions.py b/bricktracker/instructions.py index e0c4521..0b640ce 100644 --- a/bricktracker/instructions.py +++ b/bricktracker/instructions.py @@ -101,8 +101,9 @@ class BrickInstructions(object): # Skip if we already have it if os.path.isfile(target): + pdf_url = self.url() return self.socket.complete( - message=f"File {self.filename} already exists, skipped" + message=f'File {self.filename} already exists, skipped - Open PDF' ) # Fetch PDF via cloudscraper (to bypass Cloudflare) @@ -141,8 +142,9 @@ class BrickInstructions(object): # Done! logger.info(f"Downloaded {self.filename}") + pdf_url = self.url() self.socket.complete( - message=f"File {self.filename} downloaded ({self.human_size()})" + message=f'File {self.filename} downloaded ({self.human_size()}) - Open PDF' ) except Exception as e: diff --git a/bricktracker/peeron_instructions.py b/bricktracker/peeron_instructions.py new file mode 100644 index 0000000..bd433a5 --- /dev/null +++ b/bricktracker/peeron_instructions.py @@ -0,0 +1,437 @@ +import hashlib +import logging +import os +from pathlib import Path +import time +from typing import Any, NamedTuple, TYPE_CHECKING +from urllib.parse import urljoin + +from bs4 import BeautifulSoup +import cloudscraper +from flask import current_app, url_for +import requests + +from .exceptions import ErrorException +if TYPE_CHECKING: + from .socket import BrickSocket + +logger = logging.getLogger(__name__) + + +def get_peeron_user_agent(): + """Get the User-Agent string for Peeron requests from config""" + return current_app.config.get('REBRICKABLE_USER_AGENT', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36') + + +def get_peeron_download_delay(): + """Get the delay in milliseconds between Peeron page downloads from config""" + return current_app.config.get('PEERON_DOWNLOAD_DELAY', 1000) + + +def get_min_image_size(): + """Get the minimum image size for valid Peeron instruction pages from config""" + return current_app.config.get('PEERON_MIN_IMAGE_SIZE', 100) + + +def get_peeron_instruction_url(set_number: str, version_number: str): + """Get the Peeron instruction page URL using the configured pattern""" + pattern = current_app.config.get('PEERON_INSTRUCTION_PATTERN', 'http://peeron.com/scans/{set_number}-{version_number}') + return pattern.format(set_number=set_number, version_number=version_number) + + +def get_peeron_thumbnail_url(set_number: str, version_number: str): + """Get the Peeron thumbnail base URL using the configured pattern""" + pattern = current_app.config.get('PEERON_THUMBNAIL_PATTERN', 'http://belay.peeron.com/thumbs/{set_number}-{version_number}/') + return pattern.format(set_number=set_number, version_number=version_number) + + +def get_peeron_scan_url(set_number: str, version_number: str): + """Get the Peeron scan base URL using the configured pattern""" + pattern = current_app.config.get('PEERON_SCAN_PATTERN', 'http://belay.peeron.com/scans/{set_number}-{version_number}/') + return pattern.format(set_number=set_number, version_number=version_number) + + +def create_peeron_scraper(): + """Create a cloudscraper instance configured for Peeron""" + scraper = cloudscraper.create_scraper() + scraper.headers.update({ + "User-Agent": get_peeron_user_agent() + }) + return scraper + + +def get_peeron_cache_dir(): + """Get the base directory for Peeron caching""" + static_dir = Path(current_app.static_folder) + cache_dir = static_dir / 'images' / 'peeron_cache' + cache_dir.mkdir(parents=True, exist_ok=True) + return cache_dir + + +def get_set_cache_dir(set_number: str, version_number: str) -> tuple[Path, Path]: + """Get cache directories for a specific set""" + base_cache_dir = get_peeron_cache_dir() + set_cache_key = f"{set_number}-{version_number}" + + full_cache_dir = base_cache_dir / 'full' / set_cache_key + thumb_cache_dir = base_cache_dir / 'thumbs' / set_cache_key + + full_cache_dir.mkdir(parents=True, exist_ok=True) + thumb_cache_dir.mkdir(parents=True, exist_ok=True) + + return full_cache_dir, thumb_cache_dir + + +def cache_full_image_and_generate_thumbnail(image_url: str, page_number: str, set_number: str, version_number: str, session=None) -> tuple[str | None, str | None]: + """ + Download and cache full-size image, then generate a thumbnail preview. + Uses the full-size scan URLs from Peeron. + Returns (cached_image_path, thumbnail_url) or (None, None) if caching fails. + """ + try: + full_cache_dir, thumb_cache_dir = get_set_cache_dir(set_number, version_number) + + full_filename = f"{page_number}.jpg" + thumb_filename = f"{page_number}.jpg" + full_cache_path = full_cache_dir / full_filename + thumb_cache_path = thumb_cache_dir / thumb_filename + + # Return existing cached files if they exist + if full_cache_path.exists() and thumb_cache_path.exists(): + set_cache_key = f"{set_number}-{version_number}" + thumbnail_url = url_for('static', filename=f'images/peeron_cache/thumbs/{set_cache_key}/{thumb_filename}') + return str(full_cache_path), thumbnail_url + + # Download the full-size image using provided session or create new one + if session is None: + session = create_peeron_scraper() + response = session.get(image_url, timeout=30) + + if response.status_code == 200 and len(response.content) > 0: + # Validate it's actually an image by checking minimum size + min_size = get_min_image_size() + if len(response.content) < min_size: + logger.warning(f"Image too small, skipping cache: {image_url}") + return None, None + + # Write full-size image to cache + with open(full_cache_path, 'wb') as f: + f.write(response.content) + + logger.debug(f"Cached full image: {image_url} -> {full_cache_path}") + + # Generate thumbnail from the cached full image + try: + from PIL import Image + with Image.open(full_cache_path) as img: + # Create thumbnail (max 150px on longest side to match template) + img.thumbnail((150, 150), Image.Resampling.LANCZOS) + img.save(thumb_cache_path, 'JPEG', quality=85) + + logger.debug(f"Generated thumbnail: {full_cache_path} -> {thumb_cache_path}") + + set_cache_key = f"{set_number}-{version_number}" + thumbnail_url = url_for('static', filename=f'images/peeron_cache/thumbs/{set_cache_key}/{thumb_filename}') + return str(full_cache_path), thumbnail_url + + except Exception as thumb_error: + logger.error(f"Failed to generate thumbnail for {page_number}: {thumb_error}") + # Clean up the full image if thumbnail generation failed + if full_cache_path.exists(): + full_cache_path.unlink() + return None, None + else: + logger.warning(f"Failed to download full image: {image_url}") + return None, None + + except Exception as e: + logger.error(f"Error caching full image {image_url}: {e}") + return None, None + + +def clear_set_cache(set_number: str, version_number: str) -> int: + """ + Clear all cached files for a specific set after PDF generation. + Returns the number of files deleted. + """ + try: + full_cache_dir, thumb_cache_dir = get_set_cache_dir(set_number, version_number) + deleted_count = 0 + + # Delete full images + if full_cache_dir.exists(): + for cache_file in full_cache_dir.glob('*.jpg'): + try: + cache_file.unlink() + deleted_count += 1 + logger.debug(f"Deleted cached full image: {cache_file}") + except OSError as e: + logger.warning(f"Failed to delete cache file {cache_file}: {e}") + + # Remove directory if empty + try: + full_cache_dir.rmdir() + except OSError: + pass # Directory not empty or other error + + # Delete thumbnails + if thumb_cache_dir.exists(): + for cache_file in thumb_cache_dir.glob('*.jpg'): + try: + cache_file.unlink() + deleted_count += 1 + logger.debug(f"Deleted cached thumbnail: {cache_file}") + except OSError as e: + logger.warning(f"Failed to delete cache file {cache_file}: {e}") + + # Remove directory if empty + try: + thumb_cache_dir.rmdir() + except OSError: + pass # Directory not empty or other error + + # Try to remove set directory if empty + try: + set_cache_key = f"{set_number}-{version_number}" + full_cache_dir.parent.rmdir() if full_cache_dir.parent.name == set_cache_key else None + thumb_cache_dir.parent.rmdir() if thumb_cache_dir.parent.name == set_cache_key else None + except OSError: + pass # Directory not empty or other error + + logger.info(f"Set cache cleanup completed for {set_number}-{version_number}: {deleted_count} files deleted") + return deleted_count + + except Exception as e: + logger.error(f"Error during set cache cleanup for {set_number}-{version_number}: {e}") + return 0 + + +def clear_old_cache(max_age_days: int = 7) -> int: + """ + Clear old cache files across all sets. + Returns the number of files deleted. + """ + try: + base_cache_dir = get_peeron_cache_dir() + if not base_cache_dir.exists(): + return 0 + + deleted_count = 0 + max_age_seconds = max_age_days * 24 * 60 * 60 + current_time = time.time() + + # Clean both full and thumbs directories + for cache_type in ['full', 'thumbs']: + cache_type_dir = base_cache_dir / cache_type + if cache_type_dir.exists(): + for set_dir in cache_type_dir.iterdir(): + if set_dir.is_dir(): + for cache_file in set_dir.glob('*.jpg'): + file_age = current_time - os.path.getmtime(cache_file) + if file_age > max_age_seconds: + try: + cache_file.unlink() + deleted_count += 1 + logger.debug(f"Deleted old cache file: {cache_file}") + except OSError as e: + logger.warning(f"Failed to delete cache file {cache_file}: {e}") + + # Remove empty directories + try: + if not any(set_dir.iterdir()): + set_dir.rmdir() + except OSError: + pass + + logger.info(f"Old cache cleanup completed: {deleted_count} files deleted") + return deleted_count + + except Exception as e: + logger.error(f"Error during old cache cleanup: {e}") + return 0 + + +class PeeronPage(NamedTuple): + """Represents a single instruction page from Peeron""" + page_number: str + original_image_url: str # Original Peeron full-size image URL + cached_full_image_path: str # Local full-size cached image path + cached_thumbnail_url: str # Local thumbnail URL for preview + alt_text: str + rotation: int = 0 # Rotation in degrees (0, 90, 180, 270) + + +# Peeron instruction scraper +class PeeronInstructions(object): + socket: 'BrickSocket | None' + set_number: str + version_number: str + pages: list[PeeronPage] + + def __init__( + self, + set_number: str, + version_number: str = '1', + /, + *, + socket: 'BrickSocket | None' = None, + ): + # Save the socket + self.socket = socket + + # Parse set number (handle both "4011" and "4011-1" formats) + if '-' in set_number: + parts = set_number.split('-', 1) + self.set_number = parts[0] + self.version_number = parts[1] if len(parts) > 1 else '1' + else: + self.set_number = set_number + self.version_number = version_number + + # Placeholder for pages + self.pages = [] + + # Check if instructions exist on Peeron (lightweight) + def exists(self, /) -> bool: + """Check if the set exists on Peeron without caching thumbnails""" + try: + base_url = get_peeron_instruction_url(self.set_number, self.version_number) + scraper = create_peeron_scraper() + response = scraper.get(base_url) + + if response.status_code != 200: + return False + + soup = BeautifulSoup(response.text, 'html.parser') + + # Check for "Browse instruction library" header (set not found) + if soup.find('h1', string="Browse instruction library"): + return False + + # Look for thumbnail images to confirm instructions exist + thumbnails = soup.select('table[cellspacing="5"] a img[src^="http://belay.peeron.com/thumbs/"]') + return len(thumbnails) > 0 + + except Exception: + return False + + # Find all available instruction pages on Peeron + def find_pages(self, /) -> list[PeeronPage]: + """ + Scrape Peeron's HTML and return a list of available instruction pages. + Similar to BrickInstructions.find_instructions() but for Peeron. + """ + base_url = get_peeron_instruction_url(self.set_number, self.version_number) + thumb_base_url = get_peeron_thumbnail_url(self.set_number, self.version_number) + scan_base_url = get_peeron_scan_url(self.set_number, self.version_number) + + logger.debug(f"[find_pages] fetching HTML from {base_url!r}") + + # Set up session with persistent cookies for Peeron (like working dl_peeron.py) + scraper = create_peeron_scraper() + + # Download the main HTML page to establish session and cookies + try: + logger.debug(f"[find_pages] Establishing session by visiting: {base_url}") + response = scraper.get(base_url) + logger.debug(f"[find_pages] Main page visit: HTTP {response.status_code}") + if response.status_code != 200: + raise ErrorException(f'Failed to load Peeron page for {self.set_number}-{self.version_number}. HTTP {response.status_code}') + except requests.exceptions.RequestException as e: + raise ErrorException(f'Failed to connect to Peeron: {e}') + + # Parse HTML to locate instruction pages + soup = BeautifulSoup(response.text, 'html.parser') + + # Check for "Browse instruction library" header (set not found) + if soup.find('h1', string="Browse instruction library"): + raise ErrorException(f'Set {self.set_number}-{self.version_number} not found on Peeron') + + # Locate all thumbnail images in the expected table structure + # Use the configured thumbnail pattern to build the expected URL prefix + thumb_base_url = get_peeron_thumbnail_url(self.set_number, self.version_number) + thumbnails = soup.select(f'table[cellspacing="5"] a img[src^="{thumb_base_url}"]') + + if not thumbnails: + raise ErrorException(f'No instruction pages found for {self.set_number}-{self.version_number} on Peeron') + + pages: list[PeeronPage] = [] + total_thumbnails = len(thumbnails) + + # Initialize progress if socket is available + if self.socket: + self.socket.progress_total = total_thumbnails + self.socket.progress_count = 0 + self.socket.progress(message=f"Starting to cache {total_thumbnails} full images") + + for idx, img in enumerate(thumbnails, 1): + thumb_url = img['src'] + + # Extract the page number from the thumbnail URL + page_number = thumb_url.split('/')[-2] + + # Build the full-size scan URL using the page number + full_size_url = f"{scan_base_url}{page_number}/" + + logger.debug(f"[find_pages] Page {page_number}: thumb={thumb_url}, full_size={full_size_url}") + + # Create alt text for the page + alt_text = f"LEGO Instructions {self.set_number}-{self.version_number} Page {page_number}" + + # Report progress if socket is available + if self.socket: + self.socket.progress_count = idx + self.socket.progress(message=f"Caching full image {idx} of {total_thumbnails}") + + # Cache the full-size image and generate thumbnail preview using established session + cached_full_path, cached_thumb_url = cache_full_image_and_generate_thumbnail( + full_size_url, page_number, self.set_number, self.version_number, session=scraper + ) + + # Skip this page if caching failed + if not cached_full_path or not cached_thumb_url: + logger.warning(f"[find_pages] Skipping page {page_number} due to caching failure") + continue + + page = PeeronPage( + page_number=page_number, + original_image_url=full_size_url, + cached_full_image_path=cached_full_path, + cached_thumbnail_url=cached_thumb_url, + alt_text=alt_text + ) + pages.append(page) + + # Cache the pages for later use + self.pages = pages + + logger.debug(f"[find_pages] found {len(pages)} pages for {self.set_number}-{self.version_number}") + return pages + + # Find instructions with fallback to Peeron + @staticmethod + def find_instructions_with_peeron_fallback(set: str, /) -> tuple[list[tuple[str, str]], list[PeeronPage] | None]: + """ + Enhanced version of BrickInstructions.find_instructions() that falls back to Peeron. + Returns (rebrickable_instructions, peeron_pages). + If rebrickable_instructions is empty, peeron_pages will contain Peeron data. + """ + from .instructions import BrickInstructions + + # First try Rebrickable + try: + rebrickable_instructions = BrickInstructions.find_instructions(set) + return rebrickable_instructions, None + except ErrorException as e: + logger.info(f"Rebrickable failed for {set}: {e}. Trying Peeron fallback...") + + # Fallback to Peeron + try: + peeron = PeeronInstructions(set) + peeron_pages = peeron.find_pages() + return [], peeron_pages + except ErrorException as peeron_error: + # Both failed, re-raise original Rebrickable error + logger.info(f"Peeron also failed for {set}: {peeron_error}") + raise e from peeron_error \ No newline at end of file diff --git a/bricktracker/peeron_pdf.py b/bricktracker/peeron_pdf.py new file mode 100644 index 0000000..5db9070 --- /dev/null +++ b/bricktracker/peeron_pdf.py @@ -0,0 +1,200 @@ +import logging +import os +import tempfile +import time +from typing import Any, TYPE_CHECKING + +import cloudscraper +from flask import current_app +from PIL import Image + +from .exceptions import DownloadException, ErrorException +from .instructions import BrickInstructions +from .peeron_instructions import PeeronPage, get_min_image_size, get_peeron_download_delay, get_peeron_instruction_url, create_peeron_scraper +if TYPE_CHECKING: + from .socket import BrickSocket + +logger = logging.getLogger(__name__) + + +# PDF generator for Peeron instruction pages +class PeeronPDF(object): + socket: 'BrickSocket' + set_number: str + version_number: str + pages: list[PeeronPage] + filename: str + + def __init__( + self, + set_number: str, + version_number: str, + pages: list[PeeronPage], + /, + *, + socket: 'BrickSocket', + ): + # Save the socket + self.socket = socket + + # Save set information + self.set_number = set_number + self.version_number = version_number + self.pages = pages + + # Generate filename following BrickTracker conventions + self.filename = f"{set_number}-{version_number}_peeron.pdf" + + # Download pages and create PDF + def create_pdf(self, /) -> None: + """ + Downloads selected Peeron pages and merges them into a PDF. + Uses progress updates via socket similar to BrickInstructions.download() + """ + try: + target_path = self._get_target_path() + + # Skip if we already have it + if os.path.isfile(target_path): + # Create BrickInstructions instance to get PDF URL + instructions = BrickInstructions(self.filename) + pdf_url = instructions.url() + return self.socket.complete( + message=f'File {self.filename} already exists, skipped - Open PDF' + ) + + # Set up progress tracking + total_pages = len(self.pages) + self.socket.update_total(total_pages) + self.socket.progress_count = 0 + self.socket.progress(message=f"Starting PDF creation from {total_pages} cached pages") + + # Use cached images directly - no downloads needed! + cached_files_with_rotation = [] + missing_pages = [] + + for i, page in enumerate(self.pages): + # Check if cached file exists + if os.path.isfile(page.cached_full_image_path): + cached_files_with_rotation.append((page.cached_full_image_path, page.rotation)) + + # Update progress + self.socket.progress_count += 1 + self.socket.progress( + message=f"Processing cached page {page.page_number} ({i + 1}/{total_pages})" + ) + else: + missing_pages.append(page.page_number) + logger.warning(f"Cached image missing for page {page.page_number}: {page.cached_full_image_path}") + + if not cached_files_with_rotation: + raise DownloadException(f"No cached images available for set {self.set_number}-{self.version_number}. Cache may have been cleared.") + + elif len(cached_files_with_rotation) < total_pages: + # Partial success + error_msg = f"Only found {len(cached_files_with_rotation)}/{total_pages} cached images." + if missing_pages: + error_msg += f" Missing pages: {', '.join(missing_pages)}." + logger.warning(error_msg) + + # Create PDF from cached images with rotation + self._create_pdf_from_images(cached_files_with_rotation, target_path) + + # Success + logger.info(f"Created PDF {self.filename} with {len(cached_files_with_rotation)} pages") + + # Create BrickInstructions instance to get PDF URL + instructions = BrickInstructions(self.filename) + pdf_url = instructions.url() + + self.socket.complete( + message=f'PDF {self.filename} created with {len(cached_files_with_rotation)} pages - Open PDF' + ) + + # Clean up set cache after successful PDF creation + try: + from .peeron_instructions import clear_set_cache + deleted_count = clear_set_cache(self.set_number, self.version_number) + if deleted_count > 0: + logger.info(f"[create_pdf] Cleaned up {deleted_count} cache files for set {self.set_number}-{self.version_number}") + except Exception as e: + logger.warning(f"[create_pdf] Failed to clean set cache: {e}") + + except Exception as e: + logger.error(f"Error creating PDF {self.filename}: {e}") + self.socket.fail( + message=f"Error creating PDF {self.filename}: {e}" + ) + + + # Create PDF from downloaded images + def _create_pdf_from_images(self, image_paths_and_rotations: list[tuple[str, int]], output_path: str, /) -> None: + """Create a PDF from a list of image files with their rotations""" + try: + # Import FPDF (should be available from requirements) + from fpdf import FPDF + except ImportError: + raise ErrorException("FPDF library not available. Install with: pip install fpdf2") + + pdf = FPDF() + + for i, (img_path, rotation) in enumerate(image_paths_and_rotations): + try: + # Open image and apply rotation if needed + with Image.open(img_path) as image: + # Apply rotation if specified + if rotation != 0: + # PIL rotation is counter-clockwise, so we negate for clockwise rotation + image = image.rotate(-rotation, expand=True) + + width, height = image.size + + # Add page with image dimensions (convert pixels to mm) + # 1 pixel = 0.264583 mm (assuming 96 DPI) + page_width = width * 0.264583 + page_height = height * 0.264583 + + pdf.add_page(format=(page_width, page_height)) + + # Save rotated image to temporary file for FPDF + temp_rotated_path = None + if rotation != 0: + import tempfile + temp_fd, temp_rotated_path = tempfile.mkstemp(suffix='.jpg', prefix=f'peeron_rotated_{i}_') + try: + os.close(temp_fd) # Close file descriptor, we'll use the path + image.save(temp_rotated_path, 'JPEG', quality=95) + pdf.image(temp_rotated_path, x=0, y=0, w=page_width, h=page_height) + finally: + # Clean up rotated temp file + if temp_rotated_path and os.path.exists(temp_rotated_path): + os.remove(temp_rotated_path) + else: + pdf.image(img_path, x=0, y=0, w=page_width, h=page_height) + + # Update progress + progress_msg = f"Processing page {i + 1}/{len(image_paths_and_rotations)} into PDF" + if rotation != 0: + progress_msg += f" (rotated {rotation}°)" + self.socket.progress(message=progress_msg) + + except Exception as e: + logger.warning(f"Failed to add image {img_path} to PDF: {e}") + continue + + # Save the PDF + pdf.output(output_path) + + # Get target file path + def _get_target_path(self, /) -> str: + """Get the full path where the PDF should be saved""" + instructions_folder = os.path.join( + current_app.static_folder, # type: ignore + current_app.config['INSTRUCTIONS_FOLDER'] + ) + return os.path.join(instructions_folder, self.filename) + + # Create BrickInstructions instance for the generated PDF + def get_instructions(self, /) -> BrickInstructions: + """Return a BrickInstructions instance for the generated PDF""" + return BrickInstructions(self.filename) \ No newline at end of file diff --git a/bricktracker/socket.py b/bricktracker/socket.py index 10c3358..da3fca0 100644 --- a/bricktracker/socket.py +++ b/bricktracker/socket.py @@ -6,6 +6,8 @@ from flask_socketio import SocketIO from .instructions import BrickInstructions from .instructions_list import BrickInstructionsList +from .peeron_instructions import PeeronInstructions, PeeronPage +from .peeron_pdf import PeeronPDF from .set import BrickSet from .socket_decorator import authenticated_socket, rebrickable_socket from .sql import close as sql_close @@ -18,8 +20,10 @@ MESSAGES: Final[dict[str, str]] = { 'CONNECT': 'connect', 'DISCONNECT': 'disconnect', 'DOWNLOAD_INSTRUCTIONS': 'download_instructions', + 'DOWNLOAD_PEERON_PAGES': 'download_peeron_pages', 'FAIL': 'fail', 'IMPORT_SET': 'import_set', + 'LOAD_PEERON_PAGES': 'load_peeron_pages', 'LOAD_SET': 'load_set', 'PROGRESS': 'progress', 'SET_LOADED': 'set_loaded', @@ -106,6 +110,84 @@ class BrickSocket(object): BrickInstructionsList(force=True) + @self.socket.on(MESSAGES['LOAD_PEERON_PAGES'], namespace=self.namespace) # noqa: E501 + def load_peeron_pages(data: dict[str, Any], /) -> None: + logger.debug('Socket: LOAD_PEERON_PAGES={data} (from: {fr})'.format( + data=data, fr=request.remote_addr)) + + try: + set_number = data.get('set', '') + if not set_number: + self.fail(message="Set number is required") + return + + # Create Peeron instructions instance with socket for progress reporting + peeron = PeeronInstructions(set_number, socket=self) + + # Find pages (this will report progress for thumbnail caching) + pages = peeron.find_pages() + + # Complete the operation (JavaScript will handle redirect) + self.complete(message=f"Found {len(pages)} instruction pages on Peeron") + + except Exception as e: + logger.error(f"Error in load_peeron_pages: {e}") + self.fail(message=f"Error loading Peeron pages: {e}") + + @self.socket.on(MESSAGES['DOWNLOAD_PEERON_PAGES'], namespace=self.namespace) # noqa: E501 + @authenticated_socket(self) + def download_peeron_pages(data: dict[str, Any], /) -> None: + logger.debug('Socket: DOWNLOAD_PEERON_PAGES={data} (from: {fr})'.format( + data=data, + fr=request.sid, # type: ignore + )) + + try: + # Extract data from the request + set_number = data.get('set', '') + pages_data = data.get('pages', []) + + if not set_number: + raise ValueError("Set number is required") + + if not pages_data: + raise ValueError("No pages selected") + + # Parse set number + if '-' in set_number: + parts = set_number.split('-', 1) + set_num = parts[0] + version_num = parts[1] if len(parts) > 1 else '1' + else: + set_num = set_number + version_num = '1' + + # Convert page data to PeeronPage objects + pages = [] + for page_data in pages_data: + page = PeeronPage( + page_number=page_data.get('page_number', ''), + original_image_url=page_data.get('original_image_url', ''), + cached_full_image_path=page_data.get('cached_full_image_path', ''), + cached_thumbnail_url='', # Not needed for PDF generation + alt_text=page_data.get('alt_text', ''), + rotation=page_data.get('rotation', 0) + ) + pages.append(page) + + # Create PDF generator and start download + pdf_generator = PeeronPDF(set_num, version_num, pages, socket=self) + pdf_generator.create_pdf() + + # Note: Cache cleanup is handled automatically by pdf_generator.create_pdf() + + # Refresh instructions list to include new PDF + BrickInstructionsList(force=True) + + except Exception as e: + logger.error(f"Error in download_peeron_pages: {e}") + self.fail(message=f"Error downloading Peeron pages: {e}") + @self.socket.on(MESSAGES['IMPORT_SET'], namespace=self.namespace) @rebrickable_socket(self) def import_set(data: dict[str, Any], /) -> None: diff --git a/bricktracker/views/instructions.py b/bricktracker/views/instructions.py index 2c2138a..9d41ac5 100644 --- a/bricktracker/views/instructions.py +++ b/bricktracker/views/instructions.py @@ -14,6 +14,7 @@ from .exceptions import exception_handler from ..instructions import BrickInstructions from ..instructions_list import BrickInstructionsList from ..parser import parse_set +from ..peeron_instructions import PeeronInstructions from ..socket import MESSAGES from .upload import upload_helper @@ -24,6 +25,22 @@ instructions_page = Blueprint( ) +def _render_peeron_select_page(set: str) -> str: + """Helper function to render the Peeron page selection interface with cached thumbnails.""" + peeron = PeeronInstructions(set) + peeron_pages = peeron.find_pages() # This will use the cached thumbnails + current_app.logger.debug(f"[peeron_loaded] Found {len(peeron_pages)} pages for {set}") + return render_template( + 'peeron_select.html', + download=True, + pages=peeron_pages, + set=set, + path=current_app.config['SOCKET_PATH'], + namespace=current_app.config['SOCKET_NAMESPACE'], + messages=MESSAGES + ) + + # Index @instructions_page.route('/', methods=['GET']) @exception_handler(__file__) @@ -141,6 +158,10 @@ def download() -> str: except Exception: set = '' + # Check if this is a redirect after Peeron pages were loaded + if request.args.get('peeron_loaded'): + return _render_peeron_select_page(set) + return render_template( 'instructions.html', download=True, @@ -160,12 +181,50 @@ def do_download() -> str: except Exception: set = '' - return render_template( - 'instructions.html', - download=True, - instructions=BrickInstructions.find_instructions(set), - set=set, - path=current_app.config['SOCKET_PATH'], - namespace=current_app.config['SOCKET_NAMESPACE'], - messages=MESSAGES - ) + # Check if this is a redirect after Peeron pages were loaded + if request.args.get('peeron_loaded'): + return _render_peeron_select_page(set) + + # Try Rebrickable first + try: + from .instructions import BrickInstructions + rebrickable_instructions = BrickInstructions.find_instructions(set) + # Standard Rebrickable instructions found + return render_template( + 'instructions.html', + download=True, + instructions=rebrickable_instructions, + set=set, + path=current_app.config['SOCKET_PATH'], + namespace=current_app.config['SOCKET_NAMESPACE'], + messages=MESSAGES + ) + except Exception: + # Rebrickable failed, check if Peeron has instructions (without caching thumbnails yet) + try: + peeron = PeeronInstructions(set) + # Just check if pages exist, don't cache thumbnails yet + if peeron.exists(): + # Peeron has instructions - show loading interface + return render_template( + 'peeron_select.html', + download=True, + loading_peeron=True, # Flag to show loading state + set=set, + path=current_app.config['SOCKET_PATH'], + namespace=current_app.config['SOCKET_NAMESPACE'], + messages=MESSAGES + ) + else: + raise Exception("Not found on Peeron either") + except Exception: + return render_template( + 'instructions.html', + download=True, + instructions=[], + set=set, + error='No instructions found on Rebrickable or Peeron', + path=current_app.config['SOCKET_PATH'], + namespace=current_app.config['SOCKET_NAMESPACE'], + messages=MESSAGES + ) diff --git a/requirements.txt b/requirements.txt index 7cd3644..f0e433d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,5 @@ requests tzdata bs4 cloudscraper +fpdf2 +pillow \ No newline at end of file diff --git a/static/scripts/socket/peeron.js b/static/scripts/socket/peeron.js new file mode 100644 index 0000000..9a94104 --- /dev/null +++ b/static/scripts/socket/peeron.js @@ -0,0 +1,206 @@ +// Peeron Socket class +class BrickPeeronSocket extends BrickSocket { + constructor(id, path, namespace, messages) { + super(id, path, namespace, messages, true); + + // Form elements (built based on the initial id) + this.html_button = document.getElementById(id); + this.html_files = document.getElementById(`${id}-files`); + + if (this.html_button) { + this.html_button.addEventListener("click", (e) => { + this.execute(); + }); + } + + // Setup select all button + this.setup_select_all_button(); + + // Setup rotation buttons + this.setup_rotation_buttons(); + + // Setup the socket + this.setup(); + } + + setup_select_all_button() { + const selectAllButton = document.getElementById('peeron-select-all'); + if (selectAllButton) { + selectAllButton.addEventListener('click', () => { + const checkboxes = this.get_files(); + const allChecked = checkboxes.every(cb => cb.checked); + checkboxes.forEach(cb => cb.checked = !allChecked); + + // Update button text and icon + if (allChecked) { + selectAllButton.innerHTML = ' Select All'; + } else { + selectAllButton.innerHTML = ' Deselect All'; + } + }); + } + } + + setup_rotation_buttons() { + document.querySelectorAll('.peeron-rotate-btn').forEach(button => { + button.addEventListener('click', (e) => { + e.preventDefault(); // Prevent label click + e.stopPropagation(); // Stop event bubbling + + const targetId = button.getAttribute('data-target'); + const checkboxId = button.getAttribute('data-checkbox'); + const targetImg = document.getElementById(targetId); + const checkbox = document.getElementById(checkboxId); + + if (targetImg && checkbox) { + let currentRotation = parseInt(button.getAttribute('data-rotation') || '0'); + currentRotation = (currentRotation + 90) % 360; + + // Update image rotation + targetImg.style.transform = `rotate(${currentRotation}deg)`; + button.setAttribute('data-rotation', currentRotation.toString()); + + // Store rotation in checkbox data for later use + checkbox.setAttribute('data-rotation', currentRotation.toString()); + + // Update the rotation icon to indicate current state + const icon = button.querySelector('i'); + if (icon) { + // Rotate the icon to match the image rotation + icon.style.transform = `rotate(${currentRotation}deg)`; + } + } + }); + }); + } + + // Upon receiving a complete message + complete(data) { + super.complete(data); + + // Clear progress display after completion + if (this.html_progress_message) { + this.html_progress_message.classList.add("d-none"); + this.html_progress_message.textContent = ""; + } + + if (this.html_count) { + this.html_count.classList.add("d-none"); + this.html_count.textContent = ""; + } + + // Ensure spinner is hidden + this.spinner(false); + + this.toggle(true); + } + + // Execute the action + execute() { + if (!this.disabled && this.socket !== undefined && this.socket.connected) { + this.toggle(false); + + this.download_peeron_pages(); + } + } + + // Get the list of checkboxes describing files + get_files(checked=false) { + let files = []; + + if (this.html_files) { + files = [...this.html_files.querySelectorAll('input[type="checkbox"]')]; + + if (checked) { + files = files.filter(file => file.checked); + } + } + + return files; + } + + // Download Peeron pages + download_peeron_pages() { + if (this.html_files) { + const selectedFiles = this.get_files(true); + + if (selectedFiles.length === 0) { + this.fail({message: "Please select at least one page to download."}); + this.toggle(true); + return; + } + + const pages = selectedFiles.map(checkbox => ({ + page_number: checkbox.getAttribute('data-page-number'), + original_image_url: checkbox.getAttribute('data-original-image-url'), + cached_full_image_path: checkbox.getAttribute('data-cached-full-image-path'), + alt_text: checkbox.getAttribute('data-alt-text'), + rotation: parseInt(checkbox.getAttribute('data-rotation') || '0') + })); + + this.clear(); + this.spinner(true); + + const setElement = document.querySelector('input[name="download-set"]'); + const set = setElement ? setElement.value : ''; + + this.socket.emit(this.messages.DOWNLOAD_PEERON_PAGES, { + set: set, + pages: pages, + total: pages.length, + current: 0 + }); + } else { + this.fail({message: "Could not find the list of pages to download"}); + } + } + + // Toggle clicking on the button, or sending events + toggle(enabled) { + super.toggle(enabled); + + if (this.html_files) { + this.get_files().forEach(el => el.disabled = !enabled); + } + + if (this.html_button) { + this.html_button.disabled = !enabled; + } + } +} + +// Simple Peeron page loader using standard socket pattern +class BrickPeeronPageLoader extends BrickSocket { + constructor(set, path, namespace, messages) { + // Use 'peeron-loader' as the ID for socket elements + super('peeron-loader', path, namespace, messages, false); + + this.set = set; + this.setup(); + + // Auto-start loading when connected + setTimeout(() => { + if (this.socket && this.socket.connected) { + this.loadPages(); + } else { + this.socket.on('connect', () => this.loadPages()); + } + }, 100); + } + + loadPages() { + this.socket.emit(this.messages.LOAD_PEERON_PAGES, { + set: this.set + }); + } + + // Override complete to redirect when done + complete(data) { + super.complete(data); + // Redirect to show the pages selection interface + const params = new URLSearchParams(); + params.set('set', this.set); + params.set('peeron_loaded', '1'); + window.location.href = `${window.location.pathname}?${params.toString()}`; + } +} \ No newline at end of file diff --git a/templates/base.html b/templates/base.html index 286ca5d..c548e8f 100644 --- a/templates/base.html +++ b/templates/base.html @@ -105,6 +105,9 @@ {% if request.endpoint == 'set.list' %} {% endif %} + {% if request.endpoint == 'instructions.download' or request.endpoint == 'instructions.do_download' %} + + {% endif %} \ No newline at end of file diff --git a/templates/instructions/peeron_socket.html b/templates/instructions/peeron_socket.html new file mode 100644 index 0000000..79449c7 --- /dev/null +++ b/templates/instructions/peeron_socket.html @@ -0,0 +1,10 @@ + \ No newline at end of file diff --git a/templates/peeron_select.html b/templates/peeron_select.html new file mode 100644 index 0000000..fed47a9 --- /dev/null +++ b/templates/peeron_select.html @@ -0,0 +1,144 @@ +{% extends 'base.html' %} + +{% block title %} - Download instructions from Peeron{% endblock %} + +{% block main %} +
+ Progress + + + Loading... + +
++ Progress + + + Loading... + +
+