diff --git a/.gitignore b/.gitignore index 5d381cc..dc16f1c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +colors/ + + # ---> Python # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/README.md b/README.md index 7350d56..460b397 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,116 @@ -# LegoScripts +# Lego Scripts -Collection of various Lego related scripts \ No newline at end of file +Collection of various Lego related scripts + +## Rebrickable Color Data Processing Scripts + +1. `rb_colorgrabber.py` - Downloads color data from Rebrickable API +2. `rb2bl_colors.py` - Processes downloaded color data to extract IDs + +### Prerequisites + +- Python 3.7+ +- Required Python packages: + ``` + requests + ``` +- Rebrickable API key (get one from https://rebrickable.com/api/) +- Input file: `colors.csv` from https://rebrickable.com/downloads/ + +### Script Details + +#### rb_colorgrabber.py + +Downloads color data from Rebrickable API and saves it as individual JSON files. + +##### Features: +- Fetches color data from Rebrickable API +- Caches results in JSON files +- Implements rate limiting (10-second delay between requests) +- Handles corrupted cache files +- Provides detailed logging + +##### Usage: +1. Set your API key in the script +2. Ensure `colors.csv` is in the same directory +3. Run: +```bash +python rb_colorgrabber.py +``` + +The script will create a `colors` directory and save individual JSON files for each color ID. + +#### process_colors.py + +Processes the downloaded JSON files to extract BrickLink IDs and create a combined dataset. + +##### Features: +- Reads color data from JSON files +- Extracts BrickLink IDs +- Creates a combined output file +- Handles missing or corrupt data + +##### Usage: +1. Ensure you have run `rb_colorgrabber.py` first +2. Run: +```bash +python rb2bl_colors.py +``` + +The script will create `colors_combined.csv` with the processed data. + +### File Structure +``` +. +├── colors/ # Directory containing JSON files (created by fetch_colors.py) +│ ├── 1.json +│ ├── 2.json +│ └── ... +├── colors.csv # Input file from Rebrickable +├── fetch_colors.py # Script to fetch data from API +├── process_colors.py # Script to process JSON files +└── colors_combined.txt # Output file (created by process_colors.py) +``` + +### Error Handling + +Both scripts include comprehensive error handling: +- API connection errors +- File read/write errors +- JSON parsing errors +- Missing data handling + +### Logging + +The scripts provide detailed logging information: +- API request status +- File operations +- Error messages +- Processing progress + +### Best Practices + +When using these scripts: +1. Don't modify the JSON files manually +2. Keep the API key secure +3. Respect Rebrickable's rate limits +4. Back up your data before processing + +### Troubleshooting + +Common issues and solutions: +1. API Key errors: + - Verify your API key is correct + - Check your API key permissions + +2. Missing files: + - Ensure `colors.csv` is present + - Run `rb_colorgrabber.py` before `rb2bl_colors.py` + +3. Rate limiting: + - The scripts include a 10-second delay between requests + - Don't modify this unless you have permission from Rebrickable + +### License + +These scripts are provided as-is under the MIT License. diff --git a/color-processor.py b/color-processor.py new file mode 100644 index 0000000..7f290fe --- /dev/null +++ b/color-processor.py @@ -0,0 +1,110 @@ +import csv +import json +import requests +import time +from pathlib import Path +from typing import Optional, Dict +from dataclasses import dataclass +import logging +from requests.exceptions import RequestException + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s' +) + +@dataclass +class APIConfig: + """Configuration for the Rebrickable API""" + api_key: str + base_url: str = 'https://rebrickable.com/api/v3/lego/colors/' + rate_limit_delay: int = 10 # seconds between API calls + + @property + def headers(self) -> Dict[str, str]: + return { + 'Accept': 'application/json', + 'Authorization': f'key {self.api_key}' + } + +class ColorDataFetcher: + """Handles fetching and saving of color data from the Rebrickable API""" + + def __init__(self, config: APIConfig, output_dir: Path): + self.config = config + self.output_dir = output_dir + self.output_dir.mkdir(exist_ok=True) + + def fetch_and_save_color(self, color_id: int) -> Optional[Dict]: + """Fetch color data from API and save to JSON file if not already exists""" + if color_id == -1: # Skip unknown color + return None + + json_path = self.output_dir / f'{color_id}.json' + + # Check if file already exists + if json_path.exists(): + try: + with json_path.open('r', encoding='utf-8') as f: + data = json.load(f) + logging.info(f"JSON file for color ID {color_id} already exists, skipping API call") + return data + except json.JSONDecodeError: + logging.warning(f"Corrupted JSON file for color ID {color_id}, will retry API call") + json_path.unlink() + + # Fetch from API + try: + url = f'{self.config.base_url}{color_id}' + response = requests.get(url, headers=self.config.headers) + response.raise_for_status() + + data = response.json() + + # Save to JSON file + with json_path.open('w', encoding='utf-8') as f: + json.dump(data, f, indent=2) + + logging.info(f"Successfully fetched and saved color ID {color_id}") + time.sleep(self.config.rate_limit_delay) # Respect rate limits + return data + + except RequestException as e: + logging.error(f"Error fetching data for color ID {color_id}: {e}") + return None + +def process_colors(input_csv: Path, fetcher: ColorDataFetcher) -> None: + """Process colors from CSV and fetch their data""" + try: + # Read color IDs from CSV + with input_csv.open('r', encoding='utf-8') as f: + reader = csv.DictReader(f) + color_ids = [int(row['id']) for row in reader] + + # Fetch and save data for each color + for color_id in color_ids: + fetcher.fetch_and_save_color(color_id) + + except Exception as e: + logging.error(f"Error processing colors: {e}") + raise + +def main(): + """Main entry point""" + try: + api_key = '' # Should be loaded from environment variable or config file + config = APIConfig(api_key=api_key) + fetcher = ColorDataFetcher(config, output_dir=Path('colors')) + + process_colors( + input_csv=Path('colors.csv'), + fetcher=fetcher + ) + + except Exception as e: + logging.error(f"Application error: {e}") + raise + +if __name__ == '__main__': + main() diff --git a/rb_colorgrabber.py b/rb_colorgrabber.py new file mode 100644 index 0000000..7f290fe --- /dev/null +++ b/rb_colorgrabber.py @@ -0,0 +1,110 @@ +import csv +import json +import requests +import time +from pathlib import Path +from typing import Optional, Dict +from dataclasses import dataclass +import logging +from requests.exceptions import RequestException + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s' +) + +@dataclass +class APIConfig: + """Configuration for the Rebrickable API""" + api_key: str + base_url: str = 'https://rebrickable.com/api/v3/lego/colors/' + rate_limit_delay: int = 10 # seconds between API calls + + @property + def headers(self) -> Dict[str, str]: + return { + 'Accept': 'application/json', + 'Authorization': f'key {self.api_key}' + } + +class ColorDataFetcher: + """Handles fetching and saving of color data from the Rebrickable API""" + + def __init__(self, config: APIConfig, output_dir: Path): + self.config = config + self.output_dir = output_dir + self.output_dir.mkdir(exist_ok=True) + + def fetch_and_save_color(self, color_id: int) -> Optional[Dict]: + """Fetch color data from API and save to JSON file if not already exists""" + if color_id == -1: # Skip unknown color + return None + + json_path = self.output_dir / f'{color_id}.json' + + # Check if file already exists + if json_path.exists(): + try: + with json_path.open('r', encoding='utf-8') as f: + data = json.load(f) + logging.info(f"JSON file for color ID {color_id} already exists, skipping API call") + return data + except json.JSONDecodeError: + logging.warning(f"Corrupted JSON file for color ID {color_id}, will retry API call") + json_path.unlink() + + # Fetch from API + try: + url = f'{self.config.base_url}{color_id}' + response = requests.get(url, headers=self.config.headers) + response.raise_for_status() + + data = response.json() + + # Save to JSON file + with json_path.open('w', encoding='utf-8') as f: + json.dump(data, f, indent=2) + + logging.info(f"Successfully fetched and saved color ID {color_id}") + time.sleep(self.config.rate_limit_delay) # Respect rate limits + return data + + except RequestException as e: + logging.error(f"Error fetching data for color ID {color_id}: {e}") + return None + +def process_colors(input_csv: Path, fetcher: ColorDataFetcher) -> None: + """Process colors from CSV and fetch their data""" + try: + # Read color IDs from CSV + with input_csv.open('r', encoding='utf-8') as f: + reader = csv.DictReader(f) + color_ids = [int(row['id']) for row in reader] + + # Fetch and save data for each color + for color_id in color_ids: + fetcher.fetch_and_save_color(color_id) + + except Exception as e: + logging.error(f"Error processing colors: {e}") + raise + +def main(): + """Main entry point""" + try: + api_key = '' # Should be loaded from environment variable or config file + config = APIConfig(api_key=api_key) + fetcher = ColorDataFetcher(config, output_dir=Path('colors')) + + process_colors( + input_csv=Path('colors.csv'), + fetcher=fetcher + ) + + except Exception as e: + logging.error(f"Application error: {e}") + raise + +if __name__ == '__main__': + main()