import requests from bs4 import BeautifulSoup def get_instruction_links(set_id): # Construct the URL url = f"https://rebrickable.com/instructions/{set_id}" # Load the page headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } response = requests.get(url, headers=headers) if response.status_code != 200: print(f"Failed to load page. Status code: {response.status_code}") return [] # Parse the HTML content soup = BeautifulSoup(response.content, 'html.parser') # Collect all tags with "LEGO Building Instructions" in the alt attribute found_tags = [] links = [] for a_tag in soup.find_all('a', href=True): img_tag = a_tag.find('img', alt=True) if img_tag and "LEGO Building Instructions" in img_tag['alt']: found_tags.append((img_tag['alt'], a_tag['href'])) # Save alt and href # Filter links containing "V29" v29_links = [] for alt_text, href in found_tags: if "V29" in alt_text or "V 29" in alt_text: # Check for x/y format or assign sequential numbers if missing if '/' in alt_text: parts = alt_text.split('/')[0] try: numbers = [int(num) for num in parts.replace("-", " ").split() if num.isdigit()] if len(numbers) == 2: x, y = numbers if x <= 10 and y <= 10: # Only consider valid x/y pairs v29_links.append((x, y, href)) except ValueError: print(f"Failed to parse x/y in alt text: {alt_text}") else: # Assign sequential numbers if no x/y format is found v29_links.append((len(v29_links) + 1, None, href)) # Sort the V29 links by x value (if available) v29_links.sort(key=lambda link: link[0]) # If no V29 links found, return all found tags if not v29_links: print("No instructions with 'V29' found. Found the following tags:") for alt_text, href in found_tags: print(f"ALT: {alt_text}, HREF: {href}") return found_tags # Return all links for downloading return v29_links def normalize_alt(alt_text): # Normalize the alt text to a file-friendly name normalized = alt_text.replace("/", "-").replace(",", "").replace(" ", "-").replace(".", "") return normalized def download_instructions(set_id, links, is_v29=True): for index, link in enumerate(links): if is_v29: # V29 links have three elements: (x, y, href) href = link[2] # Extract href from the tuple if len(links) == 1: # Single link, use the set ID only file_name = f"{set_id}.pdf" else: # Multiple links, use the x value file_name = f"{set_id}+{index + 1}.pdf" else: # Non-V29 links have two elements: (alt_text, href) alt_text, href = link normalized_name = normalize_alt(alt_text) file_name = f"{set_id}-{normalized_name}.pdf" download_url = f"https://rebrickable.com{href}" # Full URL print(f"Downloading: {download_url} -> {file_name}") # Fetch the file response = requests.get(download_url, stream=True) if response.status_code == 200: with open(file_name, 'wb') as file: for chunk in response.iter_content(chunk_size=1024): file.write(chunk) print(f"Saved: {file_name}") else: print(f"Failed to download {download_url}. Status code: {response.status_code}") # Example usage if __name__ == "__main__": import argparse # Set up argument parser parser = argparse.ArgumentParser(description="Download LEGO building instructions for a given set ID.") parser.add_argument("set_id", type=str, help="The LEGO set ID (e.g., 10313-1)") args = parser.parse_args() set_id = args.set_id instruction_links = get_instruction_links(set_id) if instruction_links: if isinstance(instruction_links[0], tuple) and len(instruction_links[0]) == 3: # V29 links print(f"Found {len(instruction_links)} V29 instruction links.") download_instructions(set_id, instruction_links) else: # Non-V29 links print(f"Found {len(instruction_links)} non-V29 instruction links.") download_instructions(set_id, instruction_links, is_v29=False) else: print("No instruction links found.")