Initial upload of working copy

2025-01-20 22:41:22 +01:00 · 2025-01-20 22:41:22 +01:00 · 2232f289f0
commit 2232f289f0
parent e2324d9280
1 changed files with 116 additions and 0 deletions
--- a/instDownloader.py
+++ b/instDownloader.py
@ -0,0 +1,116 @@
 import requests
 from bs4 import BeautifulSoup
 def get_instruction_links(set_id):
    # Construct the URL
    url = f"https://rebrickable.com/instructions/{set_id}"
    # Load the page
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
 }
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print(f"Failed to load page. Status code: {response.status_code}")
        return []
    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')
    # Collect all <img> tags with "LEGO Building Instructions" in the alt attribute
    found_tags = []
    links = []
    for a_tag in soup.find_all('a', href=True):
        img_tag = a_tag.find('img', alt=True)
        if img_tag and "LEGO Building Instructions" in img_tag['alt']:
            found_tags.append((img_tag['alt'], a_tag['href']))  # Save alt and href
    # Filter links containing "V29"
    v29_links = []
    for alt_text, href in found_tags:
        if "V29" in alt_text or "V 29" in alt_text:
            # Check for x/y format or assign sequential numbers if missing
            if '/' in alt_text:
                parts = alt_text.split('/')[0]
                try:
                    numbers = [int(num) for num in parts.replace("-", " ").split() if num.isdigit()]
                    if len(numbers) == 2:
                        x, y = numbers
                        if x <= 10 and y <= 10:  # Only consider valid x/y pairs
                            v29_links.append((x, y, href))
                except ValueError:
                    print(f"Failed to parse x/y in alt text: {alt_text}")
            else:
                # Assign sequential numbers if no x/y format is found
                v29_links.append((len(v29_links) + 1, None, href))
    # Sort the V29 links by x value (if available)
    v29_links.sort(key=lambda link: link[0])
    # If no V29 links found, return all found tags
    if not v29_links:
        print("No instructions with 'V29' found. Found the following tags:")
        for alt_text, href in found_tags:
            print(f"ALT: {alt_text}, HREF: {href}")
        return found_tags  # Return all links for downloading
    return v29_links
 def normalize_alt(alt_text):
    # Normalize the alt text to a file-friendly name
    normalized = alt_text.replace("/", "-").replace(",", "").replace(" ", "-").replace(".", "")
    return normalized
 def download_instructions(set_id, links, is_v29=True):
    for index, link in enumerate(links):
        if is_v29:
            # V29 links have three elements: (x, y, href)
            href = link[2]  # Extract href from the tuple
            if len(links) == 1:
                # Single link, use the set ID only
                file_name = f"{set_id}.pdf"
            else:
                # Multiple links, use the x value
                file_name = f"{set_id}+{index + 1}.pdf"
        else:
            # Non-V29 links have two elements: (alt_text, href)
            alt_text, href = link
            normalized_name = normalize_alt(alt_text)
            file_name = f"{set_id}-{normalized_name}.pdf"
        download_url = f"https://rebrickable.com{href}"  # Full URL
        print(f"Downloading: {download_url} -> {file_name}")
        # Fetch the file
        response = requests.get(download_url, stream=True)
        if response.status_code == 200:
            with open(file_name, 'wb') as file:
                for chunk in response.iter_content(chunk_size=1024):
                    file.write(chunk)
            print(f"Saved: {file_name}")
        else:
            print(f"Failed to download {download_url}. Status code: {response.status_code}")
 # Example usage
 if __name__ == "__main__":
    import argparse
    # Set up argument parser
    parser = argparse.ArgumentParser(description="Download LEGO building instructions for a given set ID.")
    parser.add_argument("set_id", type=str, help="The LEGO set ID (e.g., 10313-1)")
    args = parser.parse_args()
    set_id = args.set_id
    instruction_links = get_instruction_links(set_id)
    if instruction_links:
        if isinstance(instruction_links[0], tuple) and len(instruction_links[0]) == 3:  # V29 links
            print(f"Found {len(instruction_links)} V29 instruction links.")
            download_instructions(set_id, instruction_links)
        else:  # Non-V29 links
            print(f"Found {len(instruction_links)} non-V29 instruction links.")
            download_instructions(set_id, instruction_links, is_v29=False)
    else:
        print("No instruction links found.")