From 2232f289f09af34076d116794a25934bccf692ff Mon Sep 17 00:00:00 2001 From: FrederikBaerentsen Date: Mon, 20 Jan 2025 22:41:22 +0100 Subject: [PATCH] Initial upload of working copy --- instDownloader.py | 116 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 instDownloader.py diff --git a/instDownloader.py b/instDownloader.py new file mode 100644 index 0000000..044d265 --- /dev/null +++ b/instDownloader.py @@ -0,0 +1,116 @@ +import requests +from bs4 import BeautifulSoup + +def get_instruction_links(set_id): + # Construct the URL + url = f"https://rebrickable.com/instructions/{set_id}" + + # Load the page + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' +} + response = requests.get(url, headers=headers) + if response.status_code != 200: + print(f"Failed to load page. Status code: {response.status_code}") + return [] + + # Parse the HTML content + soup = BeautifulSoup(response.content, 'html.parser') + + # Collect all tags with "LEGO Building Instructions" in the alt attribute + found_tags = [] + links = [] + for a_tag in soup.find_all('a', href=True): + img_tag = a_tag.find('img', alt=True) + if img_tag and "LEGO Building Instructions" in img_tag['alt']: + found_tags.append((img_tag['alt'], a_tag['href'])) # Save alt and href + + # Filter links containing "V29" + v29_links = [] + for alt_text, href in found_tags: + if "V29" in alt_text or "V 29" in alt_text: + # Check for x/y format or assign sequential numbers if missing + if '/' in alt_text: + parts = alt_text.split('/')[0] + try: + numbers = [int(num) for num in parts.replace("-", " ").split() if num.isdigit()] + if len(numbers) == 2: + x, y = numbers + if x <= 10 and y <= 10: # Only consider valid x/y pairs + v29_links.append((x, y, href)) + except ValueError: + print(f"Failed to parse x/y in alt text: {alt_text}") + else: + # Assign sequential numbers if no x/y format is found + v29_links.append((len(v29_links) + 1, None, href)) + + # Sort the V29 links by x value (if available) + v29_links.sort(key=lambda link: link[0]) + + # If no V29 links found, return all found tags + if not v29_links: + print("No instructions with 'V29' found. Found the following tags:") + for alt_text, href in found_tags: + print(f"ALT: {alt_text}, HREF: {href}") + return found_tags # Return all links for downloading + + return v29_links + + +def normalize_alt(alt_text): + # Normalize the alt text to a file-friendly name + normalized = alt_text.replace("/", "-").replace(",", "").replace(" ", "-").replace(".", "") + return normalized + + +def download_instructions(set_id, links, is_v29=True): + for index, link in enumerate(links): + if is_v29: + # V29 links have three elements: (x, y, href) + href = link[2] # Extract href from the tuple + if len(links) == 1: + # Single link, use the set ID only + file_name = f"{set_id}.pdf" + else: + # Multiple links, use the x value + file_name = f"{set_id}+{index + 1}.pdf" + else: + # Non-V29 links have two elements: (alt_text, href) + alt_text, href = link + normalized_name = normalize_alt(alt_text) + file_name = f"{set_id}-{normalized_name}.pdf" + + download_url = f"https://rebrickable.com{href}" # Full URL + print(f"Downloading: {download_url} -> {file_name}") + + # Fetch the file + response = requests.get(download_url, stream=True) + if response.status_code == 200: + with open(file_name, 'wb') as file: + for chunk in response.iter_content(chunk_size=1024): + file.write(chunk) + print(f"Saved: {file_name}") + else: + print(f"Failed to download {download_url}. Status code: {response.status_code}") + +# Example usage +if __name__ == "__main__": + import argparse + + # Set up argument parser + parser = argparse.ArgumentParser(description="Download LEGO building instructions for a given set ID.") + parser.add_argument("set_id", type=str, help="The LEGO set ID (e.g., 10313-1)") + args = parser.parse_args() + + set_id = args.set_id + instruction_links = get_instruction_links(set_id) + + if instruction_links: + if isinstance(instruction_links[0], tuple) and len(instruction_links[0]) == 3: # V29 links + print(f"Found {len(instruction_links)} V29 instruction links.") + download_instructions(set_id, instruction_links) + else: # Non-V29 links + print(f"Found {len(instruction_links)} non-V29 instruction links.") + download_instructions(set_id, instruction_links, is_v29=False) + else: + print("No instruction links found.")