Compare commits
2 Commits
e2324d9280
...
a882dfc743
Author | SHA1 | Date | |
---|---|---|---|
a882dfc743 | |||
2232f289f0 |
79
README.md
79
README.md
@ -1,3 +1,78 @@
|
||||
# InstructionsDownloader
|
||||
# Instruction Downloader Script
|
||||
|
||||
Script to download LEGO instructions
|
||||
This Python script is designed to download LEGO building instructions for a given set ID from the Rebrickable website. It will search for all available instruction links related to a set, and based on the presence of a "V29" (or "V 29") tag in the alt text, it will download specific versions of the instructions.
|
||||
|
||||
## Features
|
||||
|
||||
- **Download Specific Versions**: Automatically handles downloading of instructions based on the `V29` version.
|
||||
- **Handle Complex Alt Text**: The script handles alt text that includes variations like `V29`, including `x/y` patterns and ignores invalid patterns.
|
||||
- **Fallback for Missing or Invalid V29 Versions**: If no valid V29 instruction links are found, the script will download all available instructions.
|
||||
- **Customizable Download Filenames**: Instructions are saved with filenames based on the set ID and the instruction variant.
|
||||
- **User-Agent Spoofing**: The script mimics a browser request to avoid being blocked as a bot.
|
||||
|
||||
## Requirements
|
||||
|
||||
- Python 3.x
|
||||
- `requests` library
|
||||
- `beautifulsoup4` library
|
||||
|
||||
You can install the required dependencies by running:
|
||||
|
||||
```bash
|
||||
python3 -m pip install requests beautifulsoup4
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Running the Script
|
||||
|
||||
To use this script, you need to provide the LEGO set ID as an argument. The set ID is typically in the format `####-1` (e.g., `10313-1`).
|
||||
|
||||
### Example Command
|
||||
|
||||
```bash
|
||||
python3 dl.py 10313-1
|
||||
```
|
||||
|
||||
This will download the LEGO building instructions for the set `10313-1` (Wildflower Bouquet).
|
||||
|
||||
### How It Works
|
||||
|
||||
1. The script will visit the Rebrickable page for the provided set ID.
|
||||
2. It will search for all available instruction links containing "LEGO Building Instructions" in the `alt` text.
|
||||
3. If links with "V29" are found, the script will download them in order, naming them as `{set_id}+1.pdf`, `{set_id}+2.pdf`, etc.
|
||||
4. If the `x/y` values in the `alt` text are invalid (i.e., `x` or `y` greater than 10), those links will be ignored.
|
||||
5. If no valid `V29` links are found, the script will fall back to downloading all available instruction links, with filenames based on the alt text.
|
||||
|
||||
## Script Output
|
||||
|
||||
The script will print status messages during the execution, including:
|
||||
|
||||
- URLs of the files being downloaded.
|
||||
- The filenames that the instructions will be saved as.
|
||||
- Any errors encountered while downloading.
|
||||
|
||||
## Example Output
|
||||
|
||||
```bash
|
||||
Downloading: https://rebrickable.com/instructions/10313-1/123456/download/?expire=1737409263 -> 10313+1.pdf
|
||||
Saved: 10313+1.pdf
|
||||
Downloading: https://rebrickable.com/instructions/10313-1/789012/download/?expire=1737409263 -> 10313+2.pdf
|
||||
Saved: 10313+2.pdf
|
||||
```
|
||||
|
||||
If no valid `V29` links are found, the script will print out the available instruction links.
|
||||
|
||||
## Error Handling
|
||||
|
||||
- If the page cannot be loaded (e.g., invalid set ID), the script will print an error message and stop.
|
||||
- If an instruction download fails, the script will print an error message with the HTTP status code.
|
||||
|
||||
## Customization
|
||||
|
||||
- **User-Agent**: The script uses a browser-like User-Agent string to avoid being detected as a bot. You can customize the `headers` variable if needed.
|
||||
- **Download Filename Format**: The filenames for the downloaded instructions are automatically generated. You can modify the `download_instructions` function to change the naming convention.
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
116
instDownloader.py
Normal file
116
instDownloader.py
Normal file
@ -0,0 +1,116 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
def get_instruction_links(set_id):
|
||||
# Construct the URL
|
||||
url = f"https://rebrickable.com/instructions/{set_id}"
|
||||
|
||||
# Load the page
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
||||
}
|
||||
response = requests.get(url, headers=headers)
|
||||
if response.status_code != 200:
|
||||
print(f"Failed to load page. Status code: {response.status_code}")
|
||||
return []
|
||||
|
||||
# Parse the HTML content
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
|
||||
# Collect all <img> tags with "LEGO Building Instructions" in the alt attribute
|
||||
found_tags = []
|
||||
links = []
|
||||
for a_tag in soup.find_all('a', href=True):
|
||||
img_tag = a_tag.find('img', alt=True)
|
||||
if img_tag and "LEGO Building Instructions" in img_tag['alt']:
|
||||
found_tags.append((img_tag['alt'], a_tag['href'])) # Save alt and href
|
||||
|
||||
# Filter links containing "V29"
|
||||
v29_links = []
|
||||
for alt_text, href in found_tags:
|
||||
if "V29" in alt_text or "V 29" in alt_text:
|
||||
# Check for x/y format or assign sequential numbers if missing
|
||||
if '/' in alt_text:
|
||||
parts = alt_text.split('/')[0]
|
||||
try:
|
||||
numbers = [int(num) for num in parts.replace("-", " ").split() if num.isdigit()]
|
||||
if len(numbers) == 2:
|
||||
x, y = numbers
|
||||
if x <= 10 and y <= 10: # Only consider valid x/y pairs
|
||||
v29_links.append((x, y, href))
|
||||
except ValueError:
|
||||
print(f"Failed to parse x/y in alt text: {alt_text}")
|
||||
else:
|
||||
# Assign sequential numbers if no x/y format is found
|
||||
v29_links.append((len(v29_links) + 1, None, href))
|
||||
|
||||
# Sort the V29 links by x value (if available)
|
||||
v29_links.sort(key=lambda link: link[0])
|
||||
|
||||
# If no V29 links found, return all found tags
|
||||
if not v29_links:
|
||||
print("No instructions with 'V29' found. Found the following tags:")
|
||||
for alt_text, href in found_tags:
|
||||
print(f"ALT: {alt_text}, HREF: {href}")
|
||||
return found_tags # Return all links for downloading
|
||||
|
||||
return v29_links
|
||||
|
||||
|
||||
def normalize_alt(alt_text):
|
||||
# Normalize the alt text to a file-friendly name
|
||||
normalized = alt_text.replace("/", "-").replace(",", "").replace(" ", "-").replace(".", "")
|
||||
return normalized
|
||||
|
||||
|
||||
def download_instructions(set_id, links, is_v29=True):
|
||||
for index, link in enumerate(links):
|
||||
if is_v29:
|
||||
# V29 links have three elements: (x, y, href)
|
||||
href = link[2] # Extract href from the tuple
|
||||
if len(links) == 1:
|
||||
# Single link, use the set ID only
|
||||
file_name = f"{set_id}.pdf"
|
||||
else:
|
||||
# Multiple links, use the x value
|
||||
file_name = f"{set_id}+{index + 1}.pdf"
|
||||
else:
|
||||
# Non-V29 links have two elements: (alt_text, href)
|
||||
alt_text, href = link
|
||||
normalized_name = normalize_alt(alt_text)
|
||||
file_name = f"{set_id}-{normalized_name}.pdf"
|
||||
|
||||
download_url = f"https://rebrickable.com{href}" # Full URL
|
||||
print(f"Downloading: {download_url} -> {file_name}")
|
||||
|
||||
# Fetch the file
|
||||
response = requests.get(download_url, stream=True)
|
||||
if response.status_code == 200:
|
||||
with open(file_name, 'wb') as file:
|
||||
for chunk in response.iter_content(chunk_size=1024):
|
||||
file.write(chunk)
|
||||
print(f"Saved: {file_name}")
|
||||
else:
|
||||
print(f"Failed to download {download_url}. Status code: {response.status_code}")
|
||||
|
||||
# Example usage
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
|
||||
# Set up argument parser
|
||||
parser = argparse.ArgumentParser(description="Download LEGO building instructions for a given set ID.")
|
||||
parser.add_argument("set_id", type=str, help="The LEGO set ID (e.g., 10313-1)")
|
||||
args = parser.parse_args()
|
||||
|
||||
set_id = args.set_id
|
||||
instruction_links = get_instruction_links(set_id)
|
||||
|
||||
if instruction_links:
|
||||
if isinstance(instruction_links[0], tuple) and len(instruction_links[0]) == 3: # V29 links
|
||||
print(f"Found {len(instruction_links)} V29 instruction links.")
|
||||
download_instructions(set_id, instruction_links)
|
||||
else: # Non-V29 links
|
||||
print(f"Found {len(instruction_links)} non-V29 instruction links.")
|
||||
download_instructions(set_id, instruction_links, is_v29=False)
|
||||
else:
|
||||
print("No instruction links found.")
|
Loading…
x
Reference in New Issue
Block a user