Updated peeron download logic with proper socket.
This commit is contained in:
@@ -61,108 +61,205 @@ def create_peeron_scraper():
|
||||
return scraper
|
||||
|
||||
|
||||
def get_thumbnail_cache_dir():
|
||||
"""Get the directory for thumbnail caching"""
|
||||
def get_peeron_cache_dir():
|
||||
"""Get the base directory for Peeron caching"""
|
||||
static_dir = Path(current_app.static_folder)
|
||||
cache_dir = static_dir / 'images' / 'peeron_cache'
|
||||
cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
return cache_dir
|
||||
|
||||
|
||||
def get_cached_thumbnail_filename(thumbnail_url: str) -> str:
|
||||
"""Generate a filename for caching thumbnails based on URL"""
|
||||
# Create hash of the URL to avoid filename issues
|
||||
url_hash = hashlib.md5(thumbnail_url.encode()).hexdigest()
|
||||
# Extract file extension from URL, default to .jpg
|
||||
ext = '.jpg'
|
||||
if '.' in thumbnail_url:
|
||||
url_ext = '.' + thumbnail_url.split('.')[-1].lower()
|
||||
if url_ext in ['.jpg', '.jpeg', '.png', '.gif']:
|
||||
ext = url_ext
|
||||
return f"{url_hash}{ext}"
|
||||
def get_set_cache_dir(set_number: str, version_number: str) -> tuple[Path, Path]:
|
||||
"""Get cache directories for a specific set"""
|
||||
base_cache_dir = get_peeron_cache_dir()
|
||||
set_cache_key = f"{set_number}-{version_number}"
|
||||
|
||||
full_cache_dir = base_cache_dir / 'full' / set_cache_key
|
||||
thumb_cache_dir = base_cache_dir / 'thumbs' / set_cache_key
|
||||
|
||||
full_cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
thumb_cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
return full_cache_dir, thumb_cache_dir
|
||||
|
||||
|
||||
def cache_thumbnail(thumbnail_url: str) -> str | None:
|
||||
def cache_full_image_and_generate_thumbnail(image_url: str, page_number: str, set_number: str, version_number: str, session=None) -> tuple[str | None, str | None]:
|
||||
"""
|
||||
Download and cache a thumbnail image, return the local URL path.
|
||||
Returns None if caching fails.
|
||||
Download and cache full-size image, then generate a thumbnail preview.
|
||||
Uses the full-size scan URLs from Peeron.
|
||||
Returns (cached_image_path, thumbnail_url) or (None, None) if caching fails.
|
||||
"""
|
||||
try:
|
||||
cache_dir = get_thumbnail_cache_dir()
|
||||
filename = get_cached_thumbnail_filename(thumbnail_url)
|
||||
cache_path = cache_dir / filename
|
||||
full_cache_dir, thumb_cache_dir = get_set_cache_dir(set_number, version_number)
|
||||
|
||||
# Return existing cached file if it exists
|
||||
if cache_path.exists():
|
||||
return url_for('static', filename=f'images/peeron_cache/{filename}')
|
||||
full_filename = f"{page_number}.jpg"
|
||||
thumb_filename = f"{page_number}.jpg"
|
||||
full_cache_path = full_cache_dir / full_filename
|
||||
thumb_cache_path = thumb_cache_dir / thumb_filename
|
||||
|
||||
# Download the thumbnail
|
||||
scraper = create_peeron_scraper()
|
||||
response = scraper.get(thumbnail_url, timeout=10)
|
||||
# Return existing cached files if they exist
|
||||
if full_cache_path.exists() and thumb_cache_path.exists():
|
||||
set_cache_key = f"{set_number}-{version_number}"
|
||||
thumbnail_url = url_for('static', filename=f'images/peeron_cache/thumbs/{set_cache_key}/{thumb_filename}')
|
||||
return str(full_cache_path), thumbnail_url
|
||||
|
||||
# Download the full-size image using provided session or create new one
|
||||
if session is None:
|
||||
session = create_peeron_scraper()
|
||||
response = session.get(image_url, timeout=30)
|
||||
|
||||
if response.status_code == 200 and len(response.content) > 0:
|
||||
# Validate it's actually an image by checking minimum size
|
||||
min_size = get_min_image_size()
|
||||
if len(response.content) < min_size:
|
||||
logger.warning(f"Thumbnail too small, skipping cache: {thumbnail_url}")
|
||||
return None
|
||||
logger.warning(f"Image too small, skipping cache: {image_url}")
|
||||
return None, None
|
||||
|
||||
# Write to cache
|
||||
with open(cache_path, 'wb') as f:
|
||||
# Write full-size image to cache
|
||||
with open(full_cache_path, 'wb') as f:
|
||||
f.write(response.content)
|
||||
|
||||
logger.debug(f"Cached thumbnail: {thumbnail_url} -> {cache_path}")
|
||||
return url_for('static', filename=f'images/peeron_cache/{filename}')
|
||||
logger.debug(f"Cached full image: {image_url} -> {full_cache_path}")
|
||||
|
||||
# Generate thumbnail from the cached full image
|
||||
try:
|
||||
from PIL import Image
|
||||
with Image.open(full_cache_path) as img:
|
||||
# Create thumbnail (max 150px on longest side to match template)
|
||||
img.thumbnail((150, 150), Image.Resampling.LANCZOS)
|
||||
img.save(thumb_cache_path, 'JPEG', quality=85)
|
||||
|
||||
logger.debug(f"Generated thumbnail: {full_cache_path} -> {thumb_cache_path}")
|
||||
|
||||
set_cache_key = f"{set_number}-{version_number}"
|
||||
thumbnail_url = url_for('static', filename=f'images/peeron_cache/thumbs/{set_cache_key}/{thumb_filename}')
|
||||
return str(full_cache_path), thumbnail_url
|
||||
|
||||
except Exception as thumb_error:
|
||||
logger.error(f"Failed to generate thumbnail for {page_number}: {thumb_error}")
|
||||
# Clean up the full image if thumbnail generation failed
|
||||
if full_cache_path.exists():
|
||||
full_cache_path.unlink()
|
||||
return None, None
|
||||
else:
|
||||
logger.warning(f"Failed to download thumbnail: {thumbnail_url}")
|
||||
return None
|
||||
logger.warning(f"Failed to download full image: {image_url}")
|
||||
return None, None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error caching thumbnail {thumbnail_url}: {e}")
|
||||
return None
|
||||
logger.error(f"Error caching full image {image_url}: {e}")
|
||||
return None, None
|
||||
|
||||
|
||||
def clear_thumbnail_cache(max_age_days: int = 30) -> int:
|
||||
def clear_set_cache(set_number: str, version_number: str) -> int:
|
||||
"""
|
||||
Clear old thumbnail cache files.
|
||||
Clear all cached files for a specific set after PDF generation.
|
||||
Returns the number of files deleted.
|
||||
"""
|
||||
try:
|
||||
cache_dir = get_thumbnail_cache_dir()
|
||||
if not cache_dir.exists():
|
||||
full_cache_dir, thumb_cache_dir = get_set_cache_dir(set_number, version_number)
|
||||
deleted_count = 0
|
||||
|
||||
# Delete full images
|
||||
if full_cache_dir.exists():
|
||||
for cache_file in full_cache_dir.glob('*.jpg'):
|
||||
try:
|
||||
cache_file.unlink()
|
||||
deleted_count += 1
|
||||
logger.debug(f"Deleted cached full image: {cache_file}")
|
||||
except OSError as e:
|
||||
logger.warning(f"Failed to delete cache file {cache_file}: {e}")
|
||||
|
||||
# Remove directory if empty
|
||||
try:
|
||||
full_cache_dir.rmdir()
|
||||
except OSError:
|
||||
pass # Directory not empty or other error
|
||||
|
||||
# Delete thumbnails
|
||||
if thumb_cache_dir.exists():
|
||||
for cache_file in thumb_cache_dir.glob('*.jpg'):
|
||||
try:
|
||||
cache_file.unlink()
|
||||
deleted_count += 1
|
||||
logger.debug(f"Deleted cached thumbnail: {cache_file}")
|
||||
except OSError as e:
|
||||
logger.warning(f"Failed to delete cache file {cache_file}: {e}")
|
||||
|
||||
# Remove directory if empty
|
||||
try:
|
||||
thumb_cache_dir.rmdir()
|
||||
except OSError:
|
||||
pass # Directory not empty or other error
|
||||
|
||||
# Try to remove set directory if empty
|
||||
try:
|
||||
set_cache_key = f"{set_number}-{version_number}"
|
||||
full_cache_dir.parent.rmdir() if full_cache_dir.parent.name == set_cache_key else None
|
||||
thumb_cache_dir.parent.rmdir() if thumb_cache_dir.parent.name == set_cache_key else None
|
||||
except OSError:
|
||||
pass # Directory not empty or other error
|
||||
|
||||
logger.info(f"Set cache cleanup completed for {set_number}-{version_number}: {deleted_count} files deleted")
|
||||
return deleted_count
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during set cache cleanup for {set_number}-{version_number}: {e}")
|
||||
return 0
|
||||
|
||||
|
||||
def clear_old_cache(max_age_days: int = 7) -> int:
|
||||
"""
|
||||
Clear old cache files across all sets.
|
||||
Returns the number of files deleted.
|
||||
"""
|
||||
try:
|
||||
base_cache_dir = get_peeron_cache_dir()
|
||||
if not base_cache_dir.exists():
|
||||
return 0
|
||||
|
||||
deleted_count = 0
|
||||
max_age_seconds = max_age_days * 24 * 60 * 60
|
||||
current_time = time.time()
|
||||
|
||||
for cache_file in cache_dir.glob('*'):
|
||||
if cache_file.is_file():
|
||||
file_age = current_time - os.path.getmtime(cache_file)
|
||||
if file_age > max_age_seconds:
|
||||
try:
|
||||
cache_file.unlink()
|
||||
deleted_count += 1
|
||||
logger.debug(f"Deleted old cache file: {cache_file}")
|
||||
except OSError as e:
|
||||
logger.warning(f"Failed to delete cache file {cache_file}: {e}")
|
||||
# Clean both full and thumbs directories
|
||||
for cache_type in ['full', 'thumbs']:
|
||||
cache_type_dir = base_cache_dir / cache_type
|
||||
if cache_type_dir.exists():
|
||||
for set_dir in cache_type_dir.iterdir():
|
||||
if set_dir.is_dir():
|
||||
for cache_file in set_dir.glob('*.jpg'):
|
||||
file_age = current_time - os.path.getmtime(cache_file)
|
||||
if file_age > max_age_seconds:
|
||||
try:
|
||||
cache_file.unlink()
|
||||
deleted_count += 1
|
||||
logger.debug(f"Deleted old cache file: {cache_file}")
|
||||
except OSError as e:
|
||||
logger.warning(f"Failed to delete cache file {cache_file}: {e}")
|
||||
|
||||
logger.info(f"Thumbnail cache cleanup completed: {deleted_count} files deleted")
|
||||
# Remove empty directories
|
||||
try:
|
||||
if not any(set_dir.iterdir()):
|
||||
set_dir.rmdir()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
logger.info(f"Old cache cleanup completed: {deleted_count} files deleted")
|
||||
return deleted_count
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during cache cleanup: {e}")
|
||||
logger.error(f"Error during old cache cleanup: {e}")
|
||||
return 0
|
||||
|
||||
|
||||
class PeeronPage(NamedTuple):
|
||||
"""Represents a single instruction page from Peeron"""
|
||||
page_number: str
|
||||
thumbnail_url: str
|
||||
cached_thumbnail_url: str | None # Local cached thumbnail URL
|
||||
image_url: str
|
||||
original_image_url: str # Original Peeron full-size image URL
|
||||
cached_full_image_path: str # Local full-size cached image path
|
||||
cached_thumbnail_url: str # Local thumbnail URL for preview
|
||||
alt_text: str
|
||||
rotation: int = 0 # Rotation in degrees (0, 90, 180, 270)
|
||||
rotation: int = 0 # Rotation in degrees (0, 90, 180, 270)
|
||||
|
||||
|
||||
# Peeron instruction scraper
|
||||
@@ -195,13 +292,28 @@ class PeeronInstructions(object):
|
||||
# Placeholder for pages
|
||||
self.pages = []
|
||||
|
||||
# Check if instructions exist on Peeron
|
||||
# Check if instructions exist on Peeron (lightweight)
|
||||
def exists(self, /) -> bool:
|
||||
"""Check if the set exists on Peeron without downloading pages"""
|
||||
"""Check if the set exists on Peeron without caching thumbnails"""
|
||||
try:
|
||||
pages = self.find_pages()
|
||||
return len(pages) > 0
|
||||
except ErrorException:
|
||||
base_url = get_peeron_instruction_url(self.set_number, self.version_number)
|
||||
scraper = create_peeron_scraper()
|
||||
response = scraper.get(base_url)
|
||||
|
||||
if response.status_code != 200:
|
||||
return False
|
||||
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
# Check for "Browse instruction library" header (set not found)
|
||||
if soup.find('h1', string="Browse instruction library"):
|
||||
return False
|
||||
|
||||
# Look for thumbnail images to confirm instructions exist
|
||||
thumbnails = soup.select('table[cellspacing="5"] a img[src^="http://belay.peeron.com/thumbs/"]')
|
||||
return len(thumbnails) > 0
|
||||
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
# Find all available instruction pages on Peeron
|
||||
@@ -216,12 +328,14 @@ class PeeronInstructions(object):
|
||||
|
||||
logger.debug(f"[find_pages] fetching HTML from {base_url!r}")
|
||||
|
||||
# Set up cloudscraper with cookies enabled for Peeron
|
||||
# Set up session with persistent cookies for Peeron (like working dl_peeron.py)
|
||||
scraper = create_peeron_scraper()
|
||||
|
||||
# Download the main HTML page
|
||||
# Download the main HTML page to establish session and cookies
|
||||
try:
|
||||
logger.debug(f"[find_pages] Establishing session by visiting: {base_url}")
|
||||
response = scraper.get(base_url)
|
||||
logger.debug(f"[find_pages] Main page visit: HTTP {response.status_code}")
|
||||
if response.status_code != 200:
|
||||
raise ErrorException(f'Failed to load Peeron page for {self.set_number}-{self.version_number}. HTTP {response.status_code}')
|
||||
except requests.exceptions.RequestException as e:
|
||||
@@ -235,34 +349,56 @@ class PeeronInstructions(object):
|
||||
raise ErrorException(f'Set {self.set_number}-{self.version_number} not found on Peeron')
|
||||
|
||||
# Locate all thumbnail images in the expected table structure
|
||||
thumbnails = soup.select('table[cellspacing="5"] a img[src^="http://belay.peeron.com/thumbs/"]')
|
||||
# Use the configured thumbnail pattern to build the expected URL prefix
|
||||
thumb_base_url = get_peeron_thumbnail_url(self.set_number, self.version_number)
|
||||
thumbnails = soup.select(f'table[cellspacing="5"] a img[src^="{thumb_base_url}"]')
|
||||
|
||||
if not thumbnails:
|
||||
raise ErrorException(f'No instruction pages found for {self.set_number}-{self.version_number} on Peeron')
|
||||
|
||||
pages: list[PeeronPage] = []
|
||||
for img in thumbnails:
|
||||
total_thumbnails = len(thumbnails)
|
||||
|
||||
# Initialize progress if socket is available
|
||||
if self.socket:
|
||||
self.socket.progress_total = total_thumbnails
|
||||
self.socket.progress_count = 0
|
||||
self.socket.progress(message=f"Starting to cache {total_thumbnails} full images")
|
||||
|
||||
for idx, img in enumerate(thumbnails, 1):
|
||||
thumb_url = img['src']
|
||||
|
||||
# Extract the page number from the thumbnail URL
|
||||
page_number = thumb_url.split('/')[-2]
|
||||
|
||||
# Build the full-size image URL
|
||||
image_url = f"{scan_base_url}{page_number}/"
|
||||
# Build the full-size scan URL using the page number
|
||||
full_size_url = f"{scan_base_url}{page_number}/"
|
||||
|
||||
logger.debug(f"[find_pages] Page {page_number}: thumb={thumb_url}, image={image_url}")
|
||||
logger.debug(f"[find_pages] Page {page_number}: thumb={thumb_url}, full_size={full_size_url}")
|
||||
|
||||
# Create alt text for the page
|
||||
alt_text = f"LEGO Instructions {self.set_number}-{self.version_number} Page {page_number}"
|
||||
|
||||
# Cache the thumbnail
|
||||
cached_thumb_url = cache_thumbnail(thumb_url)
|
||||
# Report progress if socket is available
|
||||
if self.socket:
|
||||
self.socket.progress_count = idx
|
||||
self.socket.progress(message=f"Caching full image {idx} of {total_thumbnails}")
|
||||
|
||||
# Cache the full-size image and generate thumbnail preview using established session
|
||||
cached_full_path, cached_thumb_url = cache_full_image_and_generate_thumbnail(
|
||||
full_size_url, page_number, self.set_number, self.version_number, session=scraper
|
||||
)
|
||||
|
||||
# Skip this page if caching failed
|
||||
if not cached_full_path or not cached_thumb_url:
|
||||
logger.warning(f"[find_pages] Skipping page {page_number} due to caching failure")
|
||||
continue
|
||||
|
||||
page = PeeronPage(
|
||||
page_number=page_number,
|
||||
thumbnail_url=thumb_url,
|
||||
original_image_url=full_size_url,
|
||||
cached_full_image_path=cached_full_path,
|
||||
cached_thumbnail_url=cached_thumb_url,
|
||||
image_url=image_url,
|
||||
alt_text=alt_text
|
||||
)
|
||||
pages.append(page)
|
||||
|
||||
+48
-147
@@ -67,86 +67,58 @@ class PeeronPDF(object):
|
||||
total_pages = len(self.pages)
|
||||
self.socket.update_total(total_pages)
|
||||
self.socket.progress_count = 0
|
||||
self.socket.progress(message=f"Starting download of {total_pages} pages")
|
||||
self.socket.progress(message=f"Starting PDF creation from {total_pages} cached pages")
|
||||
|
||||
# Set up cloudscraper session for all downloads
|
||||
scraper = create_peeron_scraper()
|
||||
# Use cached images directly - no downloads needed!
|
||||
cached_files_with_rotation = []
|
||||
missing_pages = []
|
||||
|
||||
# First visit the main instruction page to establish session with Peeron
|
||||
for i, page in enumerate(self.pages):
|
||||
# Check if cached file exists
|
||||
if os.path.isfile(page.cached_full_image_path):
|
||||
cached_files_with_rotation.append((page.cached_full_image_path, page.rotation))
|
||||
|
||||
# Update progress
|
||||
self.socket.progress_count += 1
|
||||
self.socket.progress(
|
||||
message=f"Processing cached page {page.page_number} ({i + 1}/{total_pages})"
|
||||
)
|
||||
else:
|
||||
missing_pages.append(page.page_number)
|
||||
logger.warning(f"Cached image missing for page {page.page_number}: {page.cached_full_image_path}")
|
||||
|
||||
if not cached_files_with_rotation:
|
||||
raise DownloadException(f"No cached images available for set {self.set_number}-{self.version_number}. Cache may have been cleared.")
|
||||
|
||||
elif len(cached_files_with_rotation) < total_pages:
|
||||
# Partial success
|
||||
error_msg = f"Only found {len(cached_files_with_rotation)}/{total_pages} cached images."
|
||||
if missing_pages:
|
||||
error_msg += f" Missing pages: {', '.join(missing_pages)}."
|
||||
logger.warning(error_msg)
|
||||
|
||||
# Create PDF from cached images with rotation
|
||||
self._create_pdf_from_images(cached_files_with_rotation, target_path)
|
||||
|
||||
# Success
|
||||
logger.info(f"Created PDF {self.filename} with {len(cached_files_with_rotation)} pages")
|
||||
|
||||
# Create BrickInstructions instance to get PDF URL
|
||||
instructions = BrickInstructions(self.filename)
|
||||
pdf_url = instructions.url()
|
||||
|
||||
self.socket.complete(
|
||||
message=f'PDF {self.filename} created with {len(cached_files_with_rotation)} pages - <a href="{pdf_url}" target="_blank" class="btn btn-sm btn-primary ms-2"><i class="ri-external-link-line"></i> Open PDF</a>'
|
||||
)
|
||||
|
||||
# Clean up set cache after successful PDF creation
|
||||
try:
|
||||
main_page_url = get_peeron_instruction_url(self.set_number, self.version_number)
|
||||
logger.debug(f"Establishing session by visiting: {main_page_url}")
|
||||
main_response = scraper.get(main_page_url)
|
||||
logger.debug(f"Main page visit: HTTP {main_response.status_code}")
|
||||
from .peeron_instructions import clear_set_cache
|
||||
deleted_count = clear_set_cache(self.set_number, self.version_number)
|
||||
if deleted_count > 0:
|
||||
logger.info(f"[create_pdf] Cleaned up {deleted_count} cache files for set {self.set_number}-{self.version_number}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to visit main page: {e}")
|
||||
|
||||
# Download images to temporary files with rotation info
|
||||
temp_files_with_rotation = []
|
||||
failed_pages = []
|
||||
|
||||
try:
|
||||
for i, page in enumerate(self.pages):
|
||||
# Add delay between requests to avoid being blocked
|
||||
if i > 0:
|
||||
delay_ms = get_peeron_download_delay()
|
||||
time.sleep(delay_ms / 1000.0) # Convert milliseconds to seconds
|
||||
|
||||
temp_file = self._download_page_image(page, i + 1, scraper)
|
||||
if temp_file:
|
||||
temp_files_with_rotation.append((temp_file, page.rotation))
|
||||
else:
|
||||
failed_pages.append(page.page_number)
|
||||
|
||||
if not temp_files_with_rotation:
|
||||
# Collect detailed error information
|
||||
error_msg = f"Failed to download any instruction pages for set {self.set_number}-{self.version_number}."
|
||||
|
||||
# Check if it's a bot protection issue by trying to access the main page
|
||||
try:
|
||||
test_response = scraper.get(get_peeron_instruction_url(self.set_number, self.version_number))
|
||||
if test_response.status_code == 403:
|
||||
error_msg += " Peeron blocked the request (HTTP 403) - bot protection is active."
|
||||
elif test_response.status_code == 404:
|
||||
error_msg += " Set not found on Peeron (HTTP 404)."
|
||||
elif "Browse instruction library" in test_response.text:
|
||||
error_msg += " Set exists on Peeron but has no instruction scans available."
|
||||
else:
|
||||
min_size = get_min_image_size()
|
||||
error_msg += f" All pages returned small error images (smaller than {min_size}x{min_size}) - likely bot protection."
|
||||
except Exception:
|
||||
error_msg += " Could not connect to Peeron - check internet connection."
|
||||
|
||||
raise DownloadException(error_msg)
|
||||
|
||||
elif len(temp_files_with_rotation) < total_pages:
|
||||
# Partial success
|
||||
error_msg = f"Only downloaded {len(temp_files_with_rotation)}/{total_pages} pages successfully."
|
||||
if failed_pages:
|
||||
error_msg += f" Failed pages: {', '.join(failed_pages)}."
|
||||
logger.warning(error_msg)
|
||||
|
||||
# Create PDF from downloaded images with rotation
|
||||
self._create_pdf_from_images(temp_files_with_rotation, target_path)
|
||||
|
||||
# Success
|
||||
logger.info(f"Created PDF {self.filename} with {len(temp_files_with_rotation)} pages")
|
||||
|
||||
# Create BrickInstructions instance to get PDF URL
|
||||
instructions = BrickInstructions(self.filename)
|
||||
pdf_url = instructions.url()
|
||||
|
||||
self.socket.complete(
|
||||
message=f'PDF {self.filename} created with {len(temp_files_with_rotation)} pages - <a href="{pdf_url}" target="_blank" class="btn btn-sm btn-primary ms-2"><i class="ri-external-link-line"></i> Open PDF</a>'
|
||||
)
|
||||
|
||||
finally:
|
||||
# Cleanup temporary files
|
||||
for temp_file, _ in temp_files_with_rotation:
|
||||
try:
|
||||
os.remove(temp_file)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to remove temp file {temp_file}: {e}")
|
||||
logger.warning(f"[create_pdf] Failed to clean set cache: {e}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating PDF {self.filename}: {e}")
|
||||
@@ -154,77 +126,6 @@ class PeeronPDF(object):
|
||||
message=f"Error creating PDF {self.filename}: {e}"
|
||||
)
|
||||
|
||||
# Download a single page image
|
||||
def _download_page_image(self, page: PeeronPage, page_num: int, scraper, /) -> str | None:
|
||||
"""Download a single page image to a temporary file using provided scraper session"""
|
||||
try:
|
||||
logger.debug(f"Attempting to download page {page.page_number} from: {page.image_url}")
|
||||
|
||||
# Download the image using the shared scraper session
|
||||
response = scraper.get(page.image_url, stream=True)
|
||||
logger.debug(f"Page {page.page_number}: HTTP {response.status_code}, Content-Type: {response.headers.get('content-type', 'unknown')}")
|
||||
|
||||
if not response.ok:
|
||||
logger.warning(f"Failed to download page {page.page_number}: HTTP {response.status_code}")
|
||||
return None
|
||||
|
||||
# Check if response is actually an image (not an error page)
|
||||
content_type = response.headers.get('content-type', '')
|
||||
if not content_type.startswith('image/'):
|
||||
# Log first 500 chars of response for debugging
|
||||
try:
|
||||
response_text = response.text[:500]
|
||||
logger.warning(f"Page {page.page_number}: Response is not an image (content-type: {content_type}). Response preview: {response_text}")
|
||||
except:
|
||||
logger.warning(f"Page {page.page_number}: Response is not an image (content-type: {content_type})")
|
||||
return None
|
||||
|
||||
# Create temporary file
|
||||
temp_fd, temp_path = tempfile.mkstemp(suffix='.jpg', prefix=f'peeron_{page.page_number}_')
|
||||
|
||||
try:
|
||||
with os.fdopen(temp_fd, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
|
||||
# Validate that we actually got an image (not an HTML error page)
|
||||
try:
|
||||
with Image.open(temp_path) as test_img:
|
||||
width, height = test_img.size
|
||||
min_size = get_min_image_size()
|
||||
if width < min_size or height < min_size: # Too small to be a real instruction page
|
||||
logger.warning(f"Page {page.page_number}: Image too small ({width}x{height}) - likely an error page")
|
||||
os.remove(temp_path)
|
||||
return None
|
||||
except Exception as img_error:
|
||||
logger.warning(f"Page {page.page_number}: Invalid image file - {img_error}")
|
||||
os.remove(temp_path)
|
||||
return None
|
||||
|
||||
# Update progress
|
||||
self.socket.progress_count += 1
|
||||
self.socket.progress(
|
||||
message=f"Downloaded page {page.page_number} ({page_num}/{len(self.pages)})"
|
||||
)
|
||||
|
||||
return temp_path
|
||||
|
||||
except Exception as e:
|
||||
# Clean up file descriptor if something went wrong
|
||||
try:
|
||||
os.close(temp_fd)
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
os.remove(temp_path)
|
||||
except:
|
||||
pass
|
||||
raise e
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to download page {page.page_number}: {e}")
|
||||
return None
|
||||
|
||||
# Create PDF from downloaded images
|
||||
def _create_pdf_from_images(self, image_paths_and_rotations: list[tuple[str, int]], output_path: str, /) -> None:
|
||||
|
||||
+31
-3
@@ -6,7 +6,7 @@ from flask_socketio import SocketIO
|
||||
|
||||
from .instructions import BrickInstructions
|
||||
from .instructions_list import BrickInstructionsList
|
||||
from .peeron_instructions import PeeronPage
|
||||
from .peeron_instructions import PeeronInstructions, PeeronPage
|
||||
from .peeron_pdf import PeeronPDF
|
||||
from .set import BrickSet
|
||||
from .socket_decorator import authenticated_socket, rebrickable_socket
|
||||
@@ -23,6 +23,7 @@ MESSAGES: Final[dict[str, str]] = {
|
||||
'DOWNLOAD_PEERON_PAGES': 'download_peeron_pages',
|
||||
'FAIL': 'fail',
|
||||
'IMPORT_SET': 'import_set',
|
||||
'LOAD_PEERON_PAGES': 'load_peeron_pages',
|
||||
'LOAD_SET': 'load_set',
|
||||
'PROGRESS': 'progress',
|
||||
'SET_LOADED': 'set_loaded',
|
||||
@@ -109,6 +110,30 @@ class BrickSocket(object):
|
||||
|
||||
BrickInstructionsList(force=True)
|
||||
|
||||
@self.socket.on(MESSAGES['LOAD_PEERON_PAGES'], namespace=self.namespace) # noqa: E501
|
||||
def load_peeron_pages(data: dict[str, Any], /) -> None:
|
||||
logger.debug('Socket: LOAD_PEERON_PAGES={data} (from: {fr})'.format(
|
||||
data=data, fr=request.remote_addr))
|
||||
|
||||
try:
|
||||
set_number = data.get('set', '')
|
||||
if not set_number:
|
||||
self.fail(message="Set number is required")
|
||||
return
|
||||
|
||||
# Create Peeron instructions instance with socket for progress reporting
|
||||
peeron = PeeronInstructions(set_number, socket=self)
|
||||
|
||||
# Find pages (this will report progress for thumbnail caching)
|
||||
pages = peeron.find_pages()
|
||||
|
||||
# Complete the operation (JavaScript will handle redirect)
|
||||
self.complete(message=f"Found {len(pages)} instruction pages on Peeron")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in load_peeron_pages: {e}")
|
||||
self.fail(message=f"Error loading Peeron pages: {e}")
|
||||
|
||||
@self.socket.on(MESSAGES['DOWNLOAD_PEERON_PAGES'], namespace=self.namespace) # noqa: E501
|
||||
@authenticated_socket(self)
|
||||
def download_peeron_pages(data: dict[str, Any], /) -> None:
|
||||
@@ -142,8 +167,9 @@ class BrickSocket(object):
|
||||
for page_data in pages_data:
|
||||
page = PeeronPage(
|
||||
page_number=page_data.get('page_number', ''),
|
||||
thumbnail_url=page_data.get('thumbnail_url', ''),
|
||||
image_url=page_data.get('image_url', ''),
|
||||
original_image_url=page_data.get('original_image_url', ''),
|
||||
cached_full_image_path=page_data.get('cached_full_image_path', ''),
|
||||
cached_thumbnail_url='', # Not needed for PDF generation
|
||||
alt_text=page_data.get('alt_text', ''),
|
||||
rotation=page_data.get('rotation', 0)
|
||||
)
|
||||
@@ -153,6 +179,8 @@ class BrickSocket(object):
|
||||
pdf_generator = PeeronPDF(set_num, version_num, pages, socket=self)
|
||||
pdf_generator.create_pdf()
|
||||
|
||||
# Note: Cache cleanup is handled automatically by pdf_generator.create_pdf()
|
||||
|
||||
# Refresh instructions list to include new PDF
|
||||
BrickInstructionsList(force=True)
|
||||
|
||||
|
||||
@@ -25,6 +25,22 @@ instructions_page = Blueprint(
|
||||
)
|
||||
|
||||
|
||||
def _render_peeron_select_page(set: str) -> str:
|
||||
"""Helper function to render the Peeron page selection interface with cached thumbnails."""
|
||||
peeron = PeeronInstructions(set)
|
||||
peeron_pages = peeron.find_pages() # This will use the cached thumbnails
|
||||
current_app.logger.debug(f"[peeron_loaded] Found {len(peeron_pages)} pages for {set}")
|
||||
return render_template(
|
||||
'peeron_select.html',
|
||||
download=True,
|
||||
pages=peeron_pages,
|
||||
set=set,
|
||||
path=current_app.config['SOCKET_PATH'],
|
||||
namespace=current_app.config['SOCKET_NAMESPACE'],
|
||||
messages=MESSAGES
|
||||
)
|
||||
|
||||
|
||||
# Index
|
||||
@instructions_page.route('/', methods=['GET'])
|
||||
@exception_handler(__file__)
|
||||
@@ -142,6 +158,10 @@ def download() -> str:
|
||||
except Exception:
|
||||
set = ''
|
||||
|
||||
# Check if this is a redirect after Peeron pages were loaded
|
||||
if request.args.get('peeron_loaded'):
|
||||
return _render_peeron_select_page(set)
|
||||
|
||||
return render_template(
|
||||
'instructions.html',
|
||||
download=True,
|
||||
@@ -161,11 +181,14 @@ def do_download() -> str:
|
||||
except Exception:
|
||||
set = ''
|
||||
|
||||
# Try Rebrickable first, fallback to Peeron if it fails
|
||||
rebrickable_instructions, peeron_pages = PeeronInstructions.find_instructions_with_peeron_fallback(set)
|
||||
# Check if this is a redirect after Peeron pages were loaded
|
||||
if request.args.get('peeron_loaded'):
|
||||
return _render_peeron_select_page(set)
|
||||
|
||||
# Determine which template to render based on what we found
|
||||
if rebrickable_instructions:
|
||||
# Try Rebrickable first
|
||||
try:
|
||||
from .instructions import BrickInstructions
|
||||
rebrickable_instructions = BrickInstructions.find_instructions(set)
|
||||
# Standard Rebrickable instructions found
|
||||
return render_template(
|
||||
'instructions.html',
|
||||
@@ -176,26 +199,32 @@ def do_download() -> str:
|
||||
namespace=current_app.config['SOCKET_NAMESPACE'],
|
||||
messages=MESSAGES
|
||||
)
|
||||
elif peeron_pages:
|
||||
# Peeron pages found - show page selection interface
|
||||
return render_template(
|
||||
'peeron_select.html',
|
||||
download=True,
|
||||
pages=peeron_pages,
|
||||
set=set,
|
||||
path=current_app.config['SOCKET_PATH'],
|
||||
namespace=current_app.config['SOCKET_NAMESPACE'],
|
||||
messages=MESSAGES
|
||||
)
|
||||
else:
|
||||
# This shouldn't happen as the fallback method re-raises the original error
|
||||
return render_template(
|
||||
'instructions.html',
|
||||
download=True,
|
||||
instructions=[],
|
||||
set=set,
|
||||
error='No instructions found on Rebrickable or Peeron',
|
||||
path=current_app.config['SOCKET_PATH'],
|
||||
namespace=current_app.config['SOCKET_NAMESPACE'],
|
||||
messages=MESSAGES
|
||||
)
|
||||
except Exception:
|
||||
# Rebrickable failed, check if Peeron has instructions (without caching thumbnails yet)
|
||||
try:
|
||||
peeron = PeeronInstructions(set)
|
||||
# Just check if pages exist, don't cache thumbnails yet
|
||||
if peeron.exists():
|
||||
# Peeron has instructions - show loading interface
|
||||
return render_template(
|
||||
'peeron_select.html',
|
||||
download=True,
|
||||
loading_peeron=True, # Flag to show loading state
|
||||
set=set,
|
||||
path=current_app.config['SOCKET_PATH'],
|
||||
namespace=current_app.config['SOCKET_NAMESPACE'],
|
||||
messages=MESSAGES
|
||||
)
|
||||
else:
|
||||
raise Exception("Not found on Peeron either")
|
||||
except Exception:
|
||||
return render_template(
|
||||
'instructions.html',
|
||||
download=True,
|
||||
instructions=[],
|
||||
set=set,
|
||||
error='No instructions found on Rebrickable or Peeron',
|
||||
path=current_app.config['SOCKET_PATH'],
|
||||
namespace=current_app.config['SOCKET_NAMESPACE'],
|
||||
messages=MESSAGES
|
||||
)
|
||||
|
||||
@@ -132,8 +132,8 @@ class BrickPeeronSocket extends BrickSocket {
|
||||
|
||||
const pages = selectedFiles.map(checkbox => ({
|
||||
page_number: checkbox.getAttribute('data-page-number'),
|
||||
thumbnail_url: checkbox.getAttribute('data-thumbnail-url'),
|
||||
image_url: checkbox.getAttribute('data-image-url'),
|
||||
original_image_url: checkbox.getAttribute('data-original-image-url'),
|
||||
cached_full_image_path: checkbox.getAttribute('data-cached-full-image-path'),
|
||||
alt_text: checkbox.getAttribute('data-alt-text'),
|
||||
rotation: parseInt(checkbox.getAttribute('data-rotation') || '0')
|
||||
}));
|
||||
@@ -168,3 +168,39 @@ class BrickPeeronSocket extends BrickSocket {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Simple Peeron page loader using standard socket pattern
|
||||
class BrickPeeronPageLoader extends BrickSocket {
|
||||
constructor(set, path, namespace, messages) {
|
||||
// Use 'peeron-loader' as the ID for socket elements
|
||||
super('peeron-loader', path, namespace, messages, false);
|
||||
|
||||
this.set = set;
|
||||
this.setup();
|
||||
|
||||
// Auto-start loading when connected
|
||||
setTimeout(() => {
|
||||
if (this.socket && this.socket.connected) {
|
||||
this.loadPages();
|
||||
} else {
|
||||
this.socket.on('connect', () => this.loadPages());
|
||||
}
|
||||
}, 100);
|
||||
}
|
||||
|
||||
loadPages() {
|
||||
this.socket.emit(this.messages.LOAD_PEERON_PAGES, {
|
||||
set: this.set
|
||||
});
|
||||
}
|
||||
|
||||
// Override complete to redirect when done
|
||||
complete(data) {
|
||||
super.complete(data);
|
||||
// Redirect to show the pages selection interface
|
||||
const params = new URLSearchParams();
|
||||
params.set('set', this.set);
|
||||
params.set('peeron_loaded', '1');
|
||||
window.location.href = `${window.location.pathname}?${params.toString()}`;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
<script type="text/javascript">
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
new BrickPeeronPageLoader(
|
||||
'{{ set }}',
|
||||
'{{ path }}',
|
||||
'{{ namespace }}',
|
||||
{
|
||||
COMPLETE: '{{ messages['COMPLETE'] }}',
|
||||
FAIL: '{{ messages['FAIL'] }}',
|
||||
LOAD_PEERON_PAGES: '{{ messages['LOAD_PEERON_PAGES'] }}',
|
||||
PROGRESS: '{{ messages['PROGRESS'] }}',
|
||||
}
|
||||
);
|
||||
});
|
||||
</script>
|
||||
@@ -23,9 +23,45 @@
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
{% if pages %}
|
||||
{% if loading_peeron %}
|
||||
<div class="alert alert-info" role="alert">
|
||||
<i class="ri-information-line"></i> <strong>Found on Peeron:</strong> {{ set }} was not available on Rebrickable, loading instruction pages from Peeron...
|
||||
</div>
|
||||
|
||||
<!-- Socket elements for peeron-loader -->
|
||||
<div id="peeron-loader-fail" class="alert alert-danger d-none" role="alert"></div>
|
||||
<div id="peeron-loader-complete" class="alert alert-success d-none" role="alert"></div>
|
||||
<div class="mb-3">
|
||||
<p>
|
||||
Progress <span id="peeron-loader-count"></span>
|
||||
<span id="peeron-loader-spinner" class="d-none">
|
||||
<span class="spinner-border spinner-border-sm" aria-hidden="true"></span>
|
||||
<span class="visually-hidden" role="status">Loading...</span>
|
||||
</span>
|
||||
</p>
|
||||
<div id="peeron-loader-progress" class="progress" role="progressbar" aria-label="Loading Peeron pages" aria-valuenow="0" aria-valuemin="0" aria-valuemax="100">
|
||||
<div id="peeron-loader-progress-bar" class="progress-bar" style="width: 0%"></div>
|
||||
</div>
|
||||
<p id="peeron-loader-progress-message" class="text-center d-none"></p>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if loading_peeron %}
|
||||
<!-- Include socket for automatic loading -->
|
||||
{% with set=set, path=path, namespace=namespace, messages=messages %}
|
||||
{% include 'instructions/peeron_loader_socket.html' %}
|
||||
{% endwith %}
|
||||
{% endif %}
|
||||
|
||||
{% if pages %}
|
||||
<div id="peeron-loading-alert" class="alert alert-info" role="alert">
|
||||
<i class="ri-information-line"></i> <strong>Instructions found on Peeron:</strong> {{ set }} was not available on Rebrickable, but {{ pages|length }} instruction pages were found on Peeron.
|
||||
<div id="peeron-cache-progress" class="mt-2 d-none">
|
||||
<div class="progress" role="progressbar" aria-label="Caching thumbnails" aria-valuenow="0" aria-valuemin="0" aria-valuemax="100">
|
||||
<div id="peeron-cache-progress-bar" class="progress-bar" style="width: 0%"></div>
|
||||
</div>
|
||||
<small id="peeron-cache-message" class="text-muted">Caching thumbnails...</small>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card mb-3">
|
||||
<div class="card-header">
|
||||
@@ -49,16 +85,17 @@
|
||||
<div class="form-check">
|
||||
<input class="form-check-input" type="checkbox" id="peeron-page-{{ loop.index }}"
|
||||
data-page-number="{{ page.page_number }}"
|
||||
data-thumbnail-url="{{ page.thumbnail_url }}"
|
||||
data-image-url="{{ page.image_url }}"
|
||||
data-original-image-url="{{ page.original_image_url }}"
|
||||
data-cached-full-image-path="{{ page.cached_full_image_path }}"
|
||||
data-alt-text="{{ page.alt_text }}"
|
||||
data-rotation="0"
|
||||
autocomplete="off">
|
||||
<label class="form-check-label w-100" for="peeron-page-{{ loop.index }}">
|
||||
<div class="text-center position-relative">
|
||||
<div class="position-relative d-inline-block">
|
||||
<img id="peeron-img-{{ loop.index }}" src="{{ page.cached_thumbnail_url or page.thumbnail_url }}" alt="{{ page.alt_text }}"
|
||||
class="img-fluid mb-2 border rounded" style="max-height: 150px; transform: rotate(0deg); transition: transform 0.3s ease;">
|
||||
<img id="peeron-img-{{ loop.index }}" src="{{ page.cached_thumbnail_url }}" alt="{{ page.alt_text }}"
|
||||
class="img-fluid mb-2 border rounded peeron-thumbnail" style="max-height: 150px; transform: rotate(0deg); transition: transform 0.3s ease;"
|
||||
data-index="{{ loop.index }}" data-total="{{ pages|length }}">
|
||||
<button type="button" class="btn btn-sm btn-light position-absolute top-0 end-0 p-1 me-1 mt-1 peeron-rotate-btn"
|
||||
data-target="peeron-img-{{ loop.index }}" data-checkbox="peeron-page-{{ loop.index }}" data-rotation="0"
|
||||
title="Rotate page" style="font-size: 0.7rem; line-height: 1;">
|
||||
@@ -94,7 +131,11 @@
|
||||
<span id="peeron-download-status-icon" class="me-1"></span><span id="peeron-download-status" class="me-1"></span><button id="peeron-download" type="button" class="btn btn-primary"><i class="ri-download-line"></i> Download selected files</button>
|
||||
</div>
|
||||
</div>
|
||||
{% include 'instructions/peeron_socket.html' %}
|
||||
{% if not loading_peeron %}
|
||||
<!-- Include normal socket for downloading -->
|
||||
{% include 'instructions/peeron_socket.html' %}
|
||||
{% endif %}
|
||||
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user