117 lines
4.6 KiB
Python
117 lines
4.6 KiB
Python
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
def get_instruction_links(set_id):
|
|
# Construct the URL
|
|
url = f"https://rebrickable.com/instructions/{set_id}"
|
|
|
|
# Load the page
|
|
headers = {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
|
}
|
|
response = requests.get(url, headers=headers)
|
|
if response.status_code != 200:
|
|
print(f"Failed to load page. Status code: {response.status_code}")
|
|
return []
|
|
|
|
# Parse the HTML content
|
|
soup = BeautifulSoup(response.content, 'html.parser')
|
|
|
|
# Collect all <img> tags with "LEGO Building Instructions" in the alt attribute
|
|
found_tags = []
|
|
links = []
|
|
for a_tag in soup.find_all('a', href=True):
|
|
img_tag = a_tag.find('img', alt=True)
|
|
if img_tag and "LEGO Building Instructions" in img_tag['alt']:
|
|
found_tags.append((img_tag['alt'], a_tag['href'])) # Save alt and href
|
|
|
|
# Filter links containing "V29"
|
|
v29_links = []
|
|
for alt_text, href in found_tags:
|
|
if "V29" in alt_text or "V 29" in alt_text:
|
|
# Check for x/y format or assign sequential numbers if missing
|
|
if '/' in alt_text:
|
|
parts = alt_text.split('/')[0]
|
|
try:
|
|
numbers = [int(num) for num in parts.replace("-", " ").split() if num.isdigit()]
|
|
if len(numbers) == 2:
|
|
x, y = numbers
|
|
if x <= 10 and y <= 10: # Only consider valid x/y pairs
|
|
v29_links.append((x, y, href))
|
|
except ValueError:
|
|
print(f"Failed to parse x/y in alt text: {alt_text}")
|
|
else:
|
|
# Assign sequential numbers if no x/y format is found
|
|
v29_links.append((len(v29_links) + 1, None, href))
|
|
|
|
# Sort the V29 links by x value (if available)
|
|
v29_links.sort(key=lambda link: link[0])
|
|
|
|
# If no V29 links found, return all found tags
|
|
if not v29_links:
|
|
print("No instructions with 'V29' found. Found the following tags:")
|
|
for alt_text, href in found_tags:
|
|
print(f"ALT: {alt_text}, HREF: {href}")
|
|
return found_tags # Return all links for downloading
|
|
|
|
return v29_links
|
|
|
|
|
|
def normalize_alt(alt_text):
|
|
# Normalize the alt text to a file-friendly name
|
|
normalized = alt_text.replace("/", "-").replace(",", "").replace(" ", "-").replace(".", "")
|
|
return normalized
|
|
|
|
|
|
def download_instructions(set_id, links, is_v29=True):
|
|
for index, link in enumerate(links):
|
|
if is_v29:
|
|
# V29 links have three elements: (x, y, href)
|
|
href = link[2] # Extract href from the tuple
|
|
if len(links) == 1:
|
|
# Single link, use the set ID only
|
|
file_name = f"{set_id}.pdf"
|
|
else:
|
|
# Multiple links, use the x value
|
|
file_name = f"{set_id}+{index + 1}.pdf"
|
|
else:
|
|
# Non-V29 links have two elements: (alt_text, href)
|
|
alt_text, href = link
|
|
normalized_name = normalize_alt(alt_text)
|
|
file_name = f"{set_id}-{normalized_name}.pdf"
|
|
|
|
download_url = f"https://rebrickable.com{href}" # Full URL
|
|
print(f"Downloading: {download_url} -> {file_name}")
|
|
|
|
# Fetch the file
|
|
response = requests.get(download_url, stream=True)
|
|
if response.status_code == 200:
|
|
with open(file_name, 'wb') as file:
|
|
for chunk in response.iter_content(chunk_size=1024):
|
|
file.write(chunk)
|
|
print(f"Saved: {file_name}")
|
|
else:
|
|
print(f"Failed to download {download_url}. Status code: {response.status_code}")
|
|
|
|
# Example usage
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
|
|
# Set up argument parser
|
|
parser = argparse.ArgumentParser(description="Download LEGO building instructions for a given set ID.")
|
|
parser.add_argument("set_id", type=str, help="The LEGO set ID (e.g., 10313-1)")
|
|
args = parser.parse_args()
|
|
|
|
set_id = args.set_id
|
|
instruction_links = get_instruction_links(set_id)
|
|
|
|
if instruction_links:
|
|
if isinstance(instruction_links[0], tuple) and len(instruction_links[0]) == 3: # V29 links
|
|
print(f"Found {len(instruction_links)} V29 instruction links.")
|
|
download_instructions(set_id, instruction_links)
|
|
else: # Non-V29 links
|
|
print(f"Found {len(instruction_links)} non-V29 instruction links.")
|
|
download_instructions(set_id, instruction_links, is_v29=False)
|
|
else:
|
|
print("No instruction links found.")
|