master #1
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,3 +1,6 @@
|
|||||||
|
colors/
|
||||||
|
|
||||||
|
|
||||||
# ---> Python
|
# ---> Python
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
__pycache__/
|
__pycache__/
|
||||||
|
115
README.md
115
README.md
@ -1,3 +1,116 @@
|
|||||||
# LegoScripts
|
# Lego Scripts
|
||||||
|
|
||||||
Collection of various Lego related scripts
|
Collection of various Lego related scripts
|
||||||
|
|
||||||
|
## Rebrickable Color Data Processing Scripts
|
||||||
|
|
||||||
|
1. `rb_colorgrabber.py` - Downloads color data from Rebrickable API
|
||||||
|
2. `rb2bl_colors.py` - Processes downloaded color data to extract IDs
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
|
||||||
|
- Python 3.7+
|
||||||
|
- Required Python packages:
|
||||||
|
```
|
||||||
|
requests
|
||||||
|
```
|
||||||
|
- Rebrickable API key (get one from https://rebrickable.com/api/)
|
||||||
|
- Input file: `colors.csv` from https://rebrickable.com/downloads/
|
||||||
|
|
||||||
|
### Script Details
|
||||||
|
|
||||||
|
#### rb_colorgrabber.py
|
||||||
|
|
||||||
|
Downloads color data from Rebrickable API and saves it as individual JSON files.
|
||||||
|
|
||||||
|
##### Features:
|
||||||
|
- Fetches color data from Rebrickable API
|
||||||
|
- Caches results in JSON files
|
||||||
|
- Implements rate limiting (10-second delay between requests)
|
||||||
|
- Handles corrupted cache files
|
||||||
|
- Provides detailed logging
|
||||||
|
|
||||||
|
##### Usage:
|
||||||
|
1. Set your API key in the script
|
||||||
|
2. Ensure `colors.csv` is in the same directory
|
||||||
|
3. Run:
|
||||||
|
```bash
|
||||||
|
python rb_colorgrabber.py
|
||||||
|
```
|
||||||
|
|
||||||
|
The script will create a `colors` directory and save individual JSON files for each color ID.
|
||||||
|
|
||||||
|
#### process_colors.py
|
||||||
|
|
||||||
|
Processes the downloaded JSON files to extract BrickLink IDs and create a combined dataset.
|
||||||
|
|
||||||
|
##### Features:
|
||||||
|
- Reads color data from JSON files
|
||||||
|
- Extracts BrickLink IDs
|
||||||
|
- Creates a combined output file
|
||||||
|
- Handles missing or corrupt data
|
||||||
|
|
||||||
|
##### Usage:
|
||||||
|
1. Ensure you have run `rb_colorgrabber.py` first
|
||||||
|
2. Run:
|
||||||
|
```bash
|
||||||
|
python rb2bl_colors.py
|
||||||
|
```
|
||||||
|
|
||||||
|
The script will create `colors_combined.csv` with the processed data.
|
||||||
|
|
||||||
|
### File Structure
|
||||||
|
```
|
||||||
|
.
|
||||||
|
├── colors/ # Directory containing JSON files (created by fetch_colors.py)
|
||||||
|
│ ├── 1.json
|
||||||
|
│ ├── 2.json
|
||||||
|
│ └── ...
|
||||||
|
├── colors.csv # Input file from Rebrickable
|
||||||
|
├── fetch_colors.py # Script to fetch data from API
|
||||||
|
├── process_colors.py # Script to process JSON files
|
||||||
|
└── colors_combined.txt # Output file (created by process_colors.py)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Error Handling
|
||||||
|
|
||||||
|
Both scripts include comprehensive error handling:
|
||||||
|
- API connection errors
|
||||||
|
- File read/write errors
|
||||||
|
- JSON parsing errors
|
||||||
|
- Missing data handling
|
||||||
|
|
||||||
|
### Logging
|
||||||
|
|
||||||
|
The scripts provide detailed logging information:
|
||||||
|
- API request status
|
||||||
|
- File operations
|
||||||
|
- Error messages
|
||||||
|
- Processing progress
|
||||||
|
|
||||||
|
### Best Practices
|
||||||
|
|
||||||
|
When using these scripts:
|
||||||
|
1. Don't modify the JSON files manually
|
||||||
|
2. Keep the API key secure
|
||||||
|
3. Respect Rebrickable's rate limits
|
||||||
|
4. Back up your data before processing
|
||||||
|
|
||||||
|
### Troubleshooting
|
||||||
|
|
||||||
|
Common issues and solutions:
|
||||||
|
1. API Key errors:
|
||||||
|
- Verify your API key is correct
|
||||||
|
- Check your API key permissions
|
||||||
|
|
||||||
|
2. Missing files:
|
||||||
|
- Ensure `colors.csv` is present
|
||||||
|
- Run `rb_colorgrabber.py` before `rb2bl_colors.py`
|
||||||
|
|
||||||
|
3. Rate limiting:
|
||||||
|
- The scripts include a 10-second delay between requests
|
||||||
|
- Don't modify this unless you have permission from Rebrickable
|
||||||
|
|
||||||
|
### License
|
||||||
|
|
||||||
|
These scripts are provided as-is under the MIT License.
|
||||||
|
110
color-processor.py
Normal file
110
color-processor.py
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
import csv
|
||||||
|
import json
|
||||||
|
import requests
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional, Dict
|
||||||
|
from dataclasses import dataclass
|
||||||
|
import logging
|
||||||
|
from requests.exceptions import RequestException
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||||
|
)
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class APIConfig:
|
||||||
|
"""Configuration for the Rebrickable API"""
|
||||||
|
api_key: str
|
||||||
|
base_url: str = 'https://rebrickable.com/api/v3/lego/colors/'
|
||||||
|
rate_limit_delay: int = 10 # seconds between API calls
|
||||||
|
|
||||||
|
@property
|
||||||
|
def headers(self) -> Dict[str, str]:
|
||||||
|
return {
|
||||||
|
'Accept': 'application/json',
|
||||||
|
'Authorization': f'key {self.api_key}'
|
||||||
|
}
|
||||||
|
|
||||||
|
class ColorDataFetcher:
|
||||||
|
"""Handles fetching and saving of color data from the Rebrickable API"""
|
||||||
|
|
||||||
|
def __init__(self, config: APIConfig, output_dir: Path):
|
||||||
|
self.config = config
|
||||||
|
self.output_dir = output_dir
|
||||||
|
self.output_dir.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
def fetch_and_save_color(self, color_id: int) -> Optional[Dict]:
|
||||||
|
"""Fetch color data from API and save to JSON file if not already exists"""
|
||||||
|
if color_id == -1: # Skip unknown color
|
||||||
|
return None
|
||||||
|
|
||||||
|
json_path = self.output_dir / f'{color_id}.json'
|
||||||
|
|
||||||
|
# Check if file already exists
|
||||||
|
if json_path.exists():
|
||||||
|
try:
|
||||||
|
with json_path.open('r', encoding='utf-8') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
logging.info(f"JSON file for color ID {color_id} already exists, skipping API call")
|
||||||
|
return data
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
logging.warning(f"Corrupted JSON file for color ID {color_id}, will retry API call")
|
||||||
|
json_path.unlink()
|
||||||
|
|
||||||
|
# Fetch from API
|
||||||
|
try:
|
||||||
|
url = f'{self.config.base_url}{color_id}'
|
||||||
|
response = requests.get(url, headers=self.config.headers)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
# Save to JSON file
|
||||||
|
with json_path.open('w', encoding='utf-8') as f:
|
||||||
|
json.dump(data, f, indent=2)
|
||||||
|
|
||||||
|
logging.info(f"Successfully fetched and saved color ID {color_id}")
|
||||||
|
time.sleep(self.config.rate_limit_delay) # Respect rate limits
|
||||||
|
return data
|
||||||
|
|
||||||
|
except RequestException as e:
|
||||||
|
logging.error(f"Error fetching data for color ID {color_id}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def process_colors(input_csv: Path, fetcher: ColorDataFetcher) -> None:
|
||||||
|
"""Process colors from CSV and fetch their data"""
|
||||||
|
try:
|
||||||
|
# Read color IDs from CSV
|
||||||
|
with input_csv.open('r', encoding='utf-8') as f:
|
||||||
|
reader = csv.DictReader(f)
|
||||||
|
color_ids = [int(row['id']) for row in reader]
|
||||||
|
|
||||||
|
# Fetch and save data for each color
|
||||||
|
for color_id in color_ids:
|
||||||
|
fetcher.fetch_and_save_color(color_id)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Error processing colors: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Main entry point"""
|
||||||
|
try:
|
||||||
|
api_key = '<API KEY>' # Should be loaded from environment variable or config file
|
||||||
|
config = APIConfig(api_key=api_key)
|
||||||
|
fetcher = ColorDataFetcher(config, output_dir=Path('colors'))
|
||||||
|
|
||||||
|
process_colors(
|
||||||
|
input_csv=Path('colors.csv'),
|
||||||
|
fetcher=fetcher
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Application error: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
110
rb_colorgrabber.py
Normal file
110
rb_colorgrabber.py
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
import csv
|
||||||
|
import json
|
||||||
|
import requests
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional, Dict
|
||||||
|
from dataclasses import dataclass
|
||||||
|
import logging
|
||||||
|
from requests.exceptions import RequestException
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||||
|
)
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class APIConfig:
|
||||||
|
"""Configuration for the Rebrickable API"""
|
||||||
|
api_key: str
|
||||||
|
base_url: str = 'https://rebrickable.com/api/v3/lego/colors/'
|
||||||
|
rate_limit_delay: int = 10 # seconds between API calls
|
||||||
|
|
||||||
|
@property
|
||||||
|
def headers(self) -> Dict[str, str]:
|
||||||
|
return {
|
||||||
|
'Accept': 'application/json',
|
||||||
|
'Authorization': f'key {self.api_key}'
|
||||||
|
}
|
||||||
|
|
||||||
|
class ColorDataFetcher:
|
||||||
|
"""Handles fetching and saving of color data from the Rebrickable API"""
|
||||||
|
|
||||||
|
def __init__(self, config: APIConfig, output_dir: Path):
|
||||||
|
self.config = config
|
||||||
|
self.output_dir = output_dir
|
||||||
|
self.output_dir.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
def fetch_and_save_color(self, color_id: int) -> Optional[Dict]:
|
||||||
|
"""Fetch color data from API and save to JSON file if not already exists"""
|
||||||
|
if color_id == -1: # Skip unknown color
|
||||||
|
return None
|
||||||
|
|
||||||
|
json_path = self.output_dir / f'{color_id}.json'
|
||||||
|
|
||||||
|
# Check if file already exists
|
||||||
|
if json_path.exists():
|
||||||
|
try:
|
||||||
|
with json_path.open('r', encoding='utf-8') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
logging.info(f"JSON file for color ID {color_id} already exists, skipping API call")
|
||||||
|
return data
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
logging.warning(f"Corrupted JSON file for color ID {color_id}, will retry API call")
|
||||||
|
json_path.unlink()
|
||||||
|
|
||||||
|
# Fetch from API
|
||||||
|
try:
|
||||||
|
url = f'{self.config.base_url}{color_id}'
|
||||||
|
response = requests.get(url, headers=self.config.headers)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
# Save to JSON file
|
||||||
|
with json_path.open('w', encoding='utf-8') as f:
|
||||||
|
json.dump(data, f, indent=2)
|
||||||
|
|
||||||
|
logging.info(f"Successfully fetched and saved color ID {color_id}")
|
||||||
|
time.sleep(self.config.rate_limit_delay) # Respect rate limits
|
||||||
|
return data
|
||||||
|
|
||||||
|
except RequestException as e:
|
||||||
|
logging.error(f"Error fetching data for color ID {color_id}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def process_colors(input_csv: Path, fetcher: ColorDataFetcher) -> None:
|
||||||
|
"""Process colors from CSV and fetch their data"""
|
||||||
|
try:
|
||||||
|
# Read color IDs from CSV
|
||||||
|
with input_csv.open('r', encoding='utf-8') as f:
|
||||||
|
reader = csv.DictReader(f)
|
||||||
|
color_ids = [int(row['id']) for row in reader]
|
||||||
|
|
||||||
|
# Fetch and save data for each color
|
||||||
|
for color_id in color_ids:
|
||||||
|
fetcher.fetch_and_save_color(color_id)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Error processing colors: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Main entry point"""
|
||||||
|
try:
|
||||||
|
api_key = '<API KEY>' # Should be loaded from environment variable or config file
|
||||||
|
config = APIConfig(api_key=api_key)
|
||||||
|
fetcher = ColorDataFetcher(config, output_dir=Path('colors'))
|
||||||
|
|
||||||
|
process_colors(
|
||||||
|
input_csv=Path('colors.csv'),
|
||||||
|
fetcher=fetcher
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Application error: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
Loading…
x
Reference in New Issue
Block a user