Merge pull request 'master' (#1) from master into main

Reviewed-on: #1
This commit is contained in:
FrederikBaerentsen 2025-01-28 17:04:26 +01:00
commit 1c91fc895b
4 changed files with 287 additions and 2 deletions

3
.gitignore vendored
View File

@ -1,3 +1,6 @@
colors/
# ---> Python # ---> Python
# Byte-compiled / optimized / DLL files # Byte-compiled / optimized / DLL files
__pycache__/ __pycache__/

115
README.md
View File

@ -1,3 +1,116 @@
# LegoScripts # Lego Scripts
Collection of various Lego related scripts Collection of various Lego related scripts
## Rebrickable Color Data Processing Scripts
1. `rb_colorgrabber.py` - Downloads color data from Rebrickable API
2. `rb2bl_colors.py` - Processes downloaded color data to extract IDs
### Prerequisites
- Python 3.7+
- Required Python packages:
```
requests
```
- Rebrickable API key (get one from https://rebrickable.com/api/)
- Input file: `colors.csv` from https://rebrickable.com/downloads/
### Script Details
#### rb_colorgrabber.py
Downloads color data from Rebrickable API and saves it as individual JSON files.
##### Features:
- Fetches color data from Rebrickable API
- Caches results in JSON files
- Implements rate limiting (10-second delay between requests)
- Handles corrupted cache files
- Provides detailed logging
##### Usage:
1. Set your API key in the script
2. Ensure `colors.csv` is in the same directory
3. Run:
```bash
python rb_colorgrabber.py
```
The script will create a `colors` directory and save individual JSON files for each color ID.
#### process_colors.py
Processes the downloaded JSON files to extract BrickLink IDs and create a combined dataset.
##### Features:
- Reads color data from JSON files
- Extracts BrickLink IDs
- Creates a combined output file
- Handles missing or corrupt data
##### Usage:
1. Ensure you have run `rb_colorgrabber.py` first
2. Run:
```bash
python rb2bl_colors.py
```
The script will create `colors_combined.csv` with the processed data.
### File Structure
```
.
├── colors/ # Directory containing JSON files (created by fetch_colors.py)
│ ├── 1.json
│ ├── 2.json
│ └── ...
├── colors.csv # Input file from Rebrickable
├── fetch_colors.py # Script to fetch data from API
├── process_colors.py # Script to process JSON files
└── colors_combined.txt # Output file (created by process_colors.py)
```
### Error Handling
Both scripts include comprehensive error handling:
- API connection errors
- File read/write errors
- JSON parsing errors
- Missing data handling
### Logging
The scripts provide detailed logging information:
- API request status
- File operations
- Error messages
- Processing progress
### Best Practices
When using these scripts:
1. Don't modify the JSON files manually
2. Keep the API key secure
3. Respect Rebrickable's rate limits
4. Back up your data before processing
### Troubleshooting
Common issues and solutions:
1. API Key errors:
- Verify your API key is correct
- Check your API key permissions
2. Missing files:
- Ensure `colors.csv` is present
- Run `rb_colorgrabber.py` before `rb2bl_colors.py`
3. Rate limiting:
- The scripts include a 10-second delay between requests
- Don't modify this unless you have permission from Rebrickable
### License
These scripts are provided as-is under the MIT License.

59
rb2bl_colors.py Normal file
View File

@ -0,0 +1,59 @@
import os
import sys
import json
from pprint import pprint
from typing import Optional, List
from pathlib import Path
def read_color_data(color_id: str) -> Optional[str]:
"""
Read BrickLink color ID from a JSON file for a given color ID.
Args:
color_id: The color ID to look up
Returns:
The BrickLink ID if found, None otherwise
"""
try:
with open(f"colors/{color_id}.json", 'r') as file:
data = json.load(file)
bricklink_data = data['external_ids'].get('BrickLink', {})
return bricklink_data.get('ext_ids', [None])[0] if bricklink_data else None
except (FileNotFoundError, KeyError, json.JSONDecodeError):
return None
def process_colors_file(input_file: str, output_file: str) -> None:
"""
Process the colors.csv file and create a new combined file with BrickLink IDs.
Args:
input_file: Path to the input colors.csv file
output_file: Path to write the output file
"""
new_file: List[str] = []
try:
with open(input_file) as file:
lines = file.readlines()
# Process header
new_file.append('id,name,rgb,is_trans,BrickLink\n')
# Process data lines
for line in lines[1:]:
id, name, rgb, is_trans = line.strip().split(',')
bricklink_id = read_color_data(id)
new_file.append(f"{id},{name},{rgb},{is_trans},{bricklink_id}\n")
# Write output file
with open(output_file, "w") as f:
f.writelines(new_file)
except Exception as e:
print(f"Error processing file: {e}")
if __name__ == "__main__":
input_file = "colors.csv"
output_file = "colors_combined.csv"
process_colors_file(input_file, output_file)

110
rb_colorgrabber.py Normal file
View File

@ -0,0 +1,110 @@
import csv
import json
import requests
import time
from pathlib import Path
from typing import Optional, Dict
from dataclasses import dataclass
import logging
from requests.exceptions import RequestException
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
@dataclass
class APIConfig:
"""Configuration for the Rebrickable API"""
api_key: str
base_url: str = 'https://rebrickable.com/api/v3/lego/colors/'
rate_limit_delay: int = 10 # seconds between API calls
@property
def headers(self) -> Dict[str, str]:
return {
'Accept': 'application/json',
'Authorization': f'key {self.api_key}'
}
class ColorDataFetcher:
"""Handles fetching and saving of color data from the Rebrickable API"""
def __init__(self, config: APIConfig, output_dir: Path):
self.config = config
self.output_dir = output_dir
self.output_dir.mkdir(exist_ok=True)
def fetch_and_save_color(self, color_id: int) -> Optional[Dict]:
"""Fetch color data from API and save to JSON file if not already exists"""
if color_id == -1: # Skip unknown color
return None
json_path = self.output_dir / f'{color_id}.json'
# Check if file already exists
if json_path.exists():
try:
with json_path.open('r', encoding='utf-8') as f:
data = json.load(f)
logging.info(f"JSON file for color ID {color_id} already exists, skipping API call")
return data
except json.JSONDecodeError:
logging.warning(f"Corrupted JSON file for color ID {color_id}, will retry API call")
json_path.unlink()
# Fetch from API
try:
url = f'{self.config.base_url}{color_id}'
response = requests.get(url, headers=self.config.headers)
response.raise_for_status()
data = response.json()
# Save to JSON file
with json_path.open('w', encoding='utf-8') as f:
json.dump(data, f, indent=2)
logging.info(f"Successfully fetched and saved color ID {color_id}")
time.sleep(self.config.rate_limit_delay) # Respect rate limits
return data
except RequestException as e:
logging.error(f"Error fetching data for color ID {color_id}: {e}")
return None
def process_colors(input_csv: Path, fetcher: ColorDataFetcher) -> None:
"""Process colors from CSV and fetch their data"""
try:
# Read color IDs from CSV
with input_csv.open('r', encoding='utf-8') as f:
reader = csv.DictReader(f)
color_ids = [int(row['id']) for row in reader]
# Fetch and save data for each color
for color_id in color_ids:
fetcher.fetch_and_save_color(color_id)
except Exception as e:
logging.error(f"Error processing colors: {e}")
raise
def main():
"""Main entry point"""
try:
api_key = '<API KEY>' # Should be loaded from environment variable or config file
config = APIConfig(api_key=api_key)
fetcher = ColorDataFetcher(config, output_dir=Path('colors'))
process_colors(
input_csv=Path('colors.csv'),
fetcher=fetcher
)
except Exception as e:
logging.error(f"Application error: {e}")
raise
if __name__ == '__main__':
main()