From c8651027eb4d401481349a90f3b2737711d4249f Mon Sep 17 00:00:00 2001
From: FrederikBaerentsen <frederik+gitea@baerentsen.net>
Date: Sun, 26 Jan 2025 22:13:46 +0100
Subject: [PATCH] Added peeron downloader

---
 dl_peeron.py | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)
 create mode 100644 dl_peeron.py

diff --git a/dl_peeron.py b/dl_peeron.py
new file mode 100644
index 0000000..0583859
--- /dev/null
+++ b/dl_peeron.py
@@ -0,0 +1,110 @@
+import os
+import sys
+import requests
+from bs4 import BeautifulSoup
+from PIL import Image
+from fpdf import FPDF
+
+def download_peeron_images(set_number, version_number):
+    # Define URLs and paths
+    base_url = f"http://peeron.com/scans/{set_number}-{version_number}"
+    thumb_base_url = f"http://belay.peeron.com/thumbs/{set_number}-{version_number}/"
+    scan_base_url = f"http://belay.peeron.com/scans/{set_number}-{version_number}/"
+    output_folder = f"{set_number}-{version_number}_images"
+    pdf_filename = f"{set_number}-{version_number}_peeron.pdf"
+
+    # Check if PDF already exists to avoid re-downloading and processing
+    if os.path.exists(pdf_filename):
+        print(f"PDF already exists: {pdf_filename}")
+        return
+
+    # Create output folder if it doesn't exist, ensuring all images can be stored
+    os.makedirs(output_folder, exist_ok=True)
+
+    # Set up a session to manage cookies and headers for consistent requests
+    session = requests.Session()
+    session.headers.update({
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36"
+    })
+
+    # Download the main HTML page containing the thumbnails
+    response = session.get(base_url)
+    if response.status_code != 200:
+        print(f"Failed to download page: {base_url} (status code {response.status_code})")
+        return
+
+    # Parse HTML to locate the relevant content using BeautifulSoup
+    soup = BeautifulSoup(response.text, 'html.parser')
+
+    # Check for a specific header indicating the set does not exist on Peeron
+    if soup.find('h1', string="Browse instruction library"):
+        print(f"Set number {set_number} not found on Peeron.")
+        return
+
+    # Locate all thumbnail images in the expected table structure
+    thumbnails = soup.select('table[cellspacing="5"] a img[src^="http://belay.peeron.com/thumbs/"]')
+
+    # Exit early if no thumbnails are found to avoid unnecessary processing
+    if not thumbnails:
+        print("No thumbnails found.")
+        return
+
+    image_files = []  # To store paths of successfully downloaded images
+
+    for img in thumbnails:
+        thumb_url = img['src']
+        # Extract the page number from the thumbnail URL to build the full image URL
+        page_number = thumb_url.split('/')[-2]
+        image_url = f"{scan_base_url}{page_number}/"
+
+        # Download the full-size image
+        img_response = session.get(image_url, stream=True)
+        if img_response.status_code == 200:
+            img_path = os.path.join(output_folder, f"{page_number}.jpg")
+            # Save the image locally in chunks to handle large files efficiently
+            with open(img_path, 'wb') as f:
+                for chunk in img_response.iter_content(1024):
+                    f.write(chunk)
+            image_files.append(img_path)
+            print(f"Downloaded: {image_url}")
+        else:
+            print(f"Failed to download image: {image_url} (status code {img_response.status_code})")
+
+    # If no images were downloaded, exit to avoid creating an empty PDF
+    if not image_files:
+        print("No images downloaded.")
+        return
+
+    # Create a PDF from the downloaded images using FPDF
+    pdf = FPDF()
+    for img_file in image_files:
+        # Open the image with Pillow to get its dimensions for accurate scaling
+        image = Image.open(img_file)
+        width, height = image.size
+        # Add a new page and scale the image to fit
+        pdf.add_page(format=(width * 0.264583, height * 0.264583))  # Convert pixels to mm (1 px = 0.264583 mm)
+        pdf.image(img_file, x=0, y=0, w=width * 0.264583, h=height * 0.264583)  # Convert pixels to mm
+
+    # Save the generated PDF to disk
+    pdf.output(pdf_filename)
+    print(f"PDF created: {pdf_filename}")
+
+    # Cleanup: remove downloaded images and the temporary folder to save space
+    for img_file in image_files:
+        os.remove(img_file)
+    os.rmdir(output_folder)
+    print(f"Cleaned up temporary files.")
+
+if __name__ == "__main__":
+    # Ensure the script is called with the correct arguments
+    if len(sys.argv) != 2:
+        print("Usage: python script.py <set_number>")
+        sys.exit(1)
+
+    set_version_number = sys.argv[1].split('-')
+
+    set_number = set_version_number[0]
+    version_number = set_version_number[1]
+
+    download_peeron_images(set_number,version_number)
+