Merge pull request 'master' (#1) from master into main

Reviewed-on: #1
2025-01-28 17:04:26 +01:00 · 2025-01-28 17:04:26 +01:00 · 1c91fc895b
commit 1c91fc895b
parent c9a0be4a66 b24100d4f7
4 changed files with 287 additions and 2 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,6 @@
 colors/
 # ---> Python
 # Byte-compiled / optimized / DLL files
 __pycache__/
--- a/README.md
+++ b/README.md
@ -1,3 +1,116 @@
-# LegoScripts
+# Lego Scripts
 Collection of various Lego related scripts
 ## Rebrickable Color Data Processing Scripts
 1. `rb_colorgrabber.py` - Downloads color data from Rebrickable API
 2. `rb2bl_colors.py` - Processes downloaded color data to extract IDs
 ### Prerequisites
 - Python 3.7+
 - Required Python packages:
  ```
  requests
  ```
 - Rebrickable API key (get one from https://rebrickable.com/api/)
 - Input file: `colors.csv` from https://rebrickable.com/downloads/
 ### Script Details
 #### rb_colorgrabber.py
 Downloads color data from Rebrickable API and saves it as individual JSON files.
 ##### Features:
 - Fetches color data from Rebrickable API
 - Caches results in JSON files
 - Implements rate limiting (10-second delay between requests)
 - Handles corrupted cache files
 - Provides detailed logging
 ##### Usage:
 1. Set your API key in the script
 2. Ensure `colors.csv` is in the same directory
 3. Run:
 ```bash
 python rb_colorgrabber.py
 ```
 The script will create a `colors` directory and save individual JSON files for each color ID.
 #### process_colors.py
 Processes the downloaded JSON files to extract BrickLink IDs and create a combined dataset.
 ##### Features:
 - Reads color data from JSON files
 - Extracts BrickLink IDs
 - Creates a combined output file
 - Handles missing or corrupt data
 ##### Usage:
 1. Ensure you have run `rb_colorgrabber.py` first
 2. Run:
 ```bash
 python rb2bl_colors.py
 ```
 The script will create `colors_combined.csv` with the processed data.
 ### File Structure
 ```
 .
 ├── colors/              # Directory containing JSON files (created by fetch_colors.py)
 │   ├── 1.json
 │   ├── 2.json
 │   └── ...
 ├── colors.csv          # Input file from Rebrickable
 ├── fetch_colors.py     # Script to fetch data from API
 ├── process_colors.py   # Script to process JSON files
 └── colors_combined.txt # Output file (created by process_colors.py)
 ```
 ### Error Handling
 Both scripts include comprehensive error handling:
 - API connection errors
 - File read/write errors
 - JSON parsing errors
 - Missing data handling
 ### Logging
 The scripts provide detailed logging information:
 - API request status
 - File operations
 - Error messages
 - Processing progress
 ### Best Practices
 When using these scripts:
 1. Don't modify the JSON files manually
 2. Keep the API key secure
 3. Respect Rebrickable's rate limits
 4. Back up your data before processing
 ### Troubleshooting
 Common issues and solutions:
 1. API Key errors:
   - Verify your API key is correct
   - Check your API key permissions
 2. Missing files:
   - Ensure `colors.csv` is present
   - Run `rb_colorgrabber.py` before `rb2bl_colors.py`
 3. Rate limiting:
   - The scripts include a 10-second delay between requests
   - Don't modify this unless you have permission from Rebrickable
 ### License
 These scripts are provided as-is under the MIT License.
--- a/rb2bl_colors.py
+++ b/rb2bl_colors.py
@ -0,0 +1,59 @@
 import os
 import sys
 import json
 from pprint import pprint
 from typing import Optional, List
 from pathlib import Path
 def read_color_data(color_id: str) -> Optional[str]:
    """
    Read BrickLink color ID from a JSON file for a given color ID.
    Args:
        color_id: The color ID to look up
    Returns:
        The BrickLink ID if found, None otherwise
    """
    try:
        with open(f"colors/{color_id}.json", 'r') as file:
            data = json.load(file)
            bricklink_data = data['external_ids'].get('BrickLink', {})
            return bricklink_data.get('ext_ids', [None])[0] if bricklink_data else None
    except (FileNotFoundError, KeyError, json.JSONDecodeError):
        return None
 def process_colors_file(input_file: str, output_file: str) -> None:
    """
    Process the colors.csv file and create a new combined file with BrickLink IDs.
    Args:
        input_file: Path to the input colors.csv file
        output_file: Path to write the output file
    """
    new_file: List[str] = []
    try:
        with open(input_file) as file:
            lines = file.readlines()
        # Process header
        new_file.append('id,name,rgb,is_trans,BrickLink\n')
        # Process data lines
        for line in lines[1:]:
            id, name, rgb, is_trans = line.strip().split(',')
            bricklink_id = read_color_data(id)
            new_file.append(f"{id},{name},{rgb},{is_trans},{bricklink_id}\n")
        # Write output file
        with open(output_file, "w") as f:
            f.writelines(new_file)
    except Exception as e:
        print(f"Error processing file: {e}")
 if __name__ == "__main__":
    input_file = "colors.csv"
    output_file = "colors_combined.csv"
    process_colors_file(input_file, output_file)
--- a/rb_colorgrabber.py
+++ b/rb_colorgrabber.py
@ -0,0 +1,110 @@
 import csv
 import json
 import requests
 import time
 from pathlib import Path
 from typing import Optional, Dict
 from dataclasses import dataclass
 import logging
 from requests.exceptions import RequestException
 # Configure logging
 logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
 )
@dataclass
 class APIConfig:
    """Configuration for the Rebrickable API"""
    api_key: str
    base_url: str = 'https://rebrickable.com/api/v3/lego/colors/'
    rate_limit_delay: int = 10  # seconds between API calls
    @property
    def headers(self) -> Dict[str, str]:
        return {
            'Accept': 'application/json',
            'Authorization': f'key {self.api_key}'
        }
 class ColorDataFetcher:
    """Handles fetching and saving of color data from the Rebrickable API"""
    def __init__(self, config: APIConfig, output_dir: Path):
        self.config = config
        self.output_dir = output_dir
        self.output_dir.mkdir(exist_ok=True)
    def fetch_and_save_color(self, color_id: int) -> Optional[Dict]:
        """Fetch color data from API and save to JSON file if not already exists"""
        if color_id == -1:  # Skip unknown color
            return None
        json_path = self.output_dir / f'{color_id}.json'
        # Check if file already exists
        if json_path.exists():
            try:
                with json_path.open('r', encoding='utf-8') as f:
                    data = json.load(f)
                logging.info(f"JSON file for color ID {color_id} already exists, skipping API call")
                return data
            except json.JSONDecodeError:
                logging.warning(f"Corrupted JSON file for color ID {color_id}, will retry API call")
                json_path.unlink()
        # Fetch from API
        try:
            url = f'{self.config.base_url}{color_id}'
            response = requests.get(url, headers=self.config.headers)
            response.raise_for_status()
            data = response.json()
            # Save to JSON file
            with json_path.open('w', encoding='utf-8') as f:
                json.dump(data, f, indent=2)
            logging.info(f"Successfully fetched and saved color ID {color_id}")
            time.sleep(self.config.rate_limit_delay)  # Respect rate limits
            return data
        except RequestException as e:
            logging.error(f"Error fetching data for color ID {color_id}: {e}")
            return None
 def process_colors(input_csv: Path, fetcher: ColorDataFetcher) -> None:
    """Process colors from CSV and fetch their data"""
    try:
        # Read color IDs from CSV
        with input_csv.open('r', encoding='utf-8') as f:
            reader = csv.DictReader(f)
            color_ids = [int(row['id']) for row in reader]
        # Fetch and save data for each color
        for color_id in color_ids:
            fetcher.fetch_and_save_color(color_id)
    except Exception as e:
        logging.error(f"Error processing colors: {e}")
        raise
 def main():
    """Main entry point"""
    try:
        api_key = '<API KEY>'  # Should be loaded from environment variable or config file
        config = APIConfig(api_key=api_key)
        fetcher = ColorDataFetcher(config, output_dir=Path('colors'))
        process_colors(
            input_csv=Path('colors.csv'),
            fetcher=fetcher
        )
    except Exception as e:
        logging.error(f"Application error: {e}")
        raise
 if __name__ == '__main__':
    main()