From 18814e37c78d8d54a794fd522abe925ff155087e Mon Sep 17 00:00:00 2001 From: FrederikBaerentsen Date: Tue, 28 Jan 2025 15:58:34 +0100 Subject: [PATCH 1/3] Script to translate rb colors to bl colors --- rb2bl_colors.py | 59 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 rb2bl_colors.py diff --git a/rb2bl_colors.py b/rb2bl_colors.py new file mode 100644 index 0000000..622f173 --- /dev/null +++ b/rb2bl_colors.py @@ -0,0 +1,59 @@ +import os +import sys +import json +from pprint import pprint +from typing import Optional, List +from pathlib import Path + +def read_color_data(color_id: str) -> Optional[str]: + """ + Read BrickLink color ID from a JSON file for a given color ID. + + Args: + color_id: The color ID to look up + + Returns: + The BrickLink ID if found, None otherwise + """ + try: + with open(f"colors/{color_id}.json", 'r') as file: + data = json.load(file) + bricklink_data = data['external_ids'].get('BrickLink', {}) + return bricklink_data.get('ext_ids', [None])[0] if bricklink_data else None + except (FileNotFoundError, KeyError, json.JSONDecodeError): + return None + +def process_colors_file(input_file: str, output_file: str) -> None: + """ + Process the colors.csv file and create a new combined file with BrickLink IDs. + + Args: + input_file: Path to the input colors.csv file + output_file: Path to write the output file + """ + new_file: List[str] = [] + + try: + with open(input_file) as file: + lines = file.readlines() + + # Process header + new_file.append('id,name,rgb,is_trans,BrickLink\n') + + # Process data lines + for line in lines[1:]: + id, name, rgb, is_trans = line.strip().split(',') + bricklink_id = read_color_data(id) + new_file.append(f"{id},{name},{rgb},{is_trans},{bricklink_id}\n") + + # Write output file + with open(output_file, "w") as f: + f.writelines(new_file) + + except Exception as e: + print(f"Error processing file: {e}") + +if __name__ == "__main__": + input_file = "colors.csv" + output_file = "colors_combined.csv" + process_colors_file(input_file, output_file) From dcd1f5af5d6dd3bcf32fa68dd4b2df3a96abbcfb Mon Sep 17 00:00:00 2001 From: FrederikBaerentsen Date: Tue, 28 Jan 2025 16:59:01 +0100 Subject: [PATCH 2/3] Grab rb colors and save as json --- .gitignore | 3 ++ README.md | 117 ++++++++++++++++++++++++++++++++++++++++++++- color-processor.py | 110 ++++++++++++++++++++++++++++++++++++++++++ rb_colorgrabber.py | 110 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 338 insertions(+), 2 deletions(-) create mode 100644 color-processor.py create mode 100644 rb_colorgrabber.py diff --git a/.gitignore b/.gitignore index 5d381cc..dc16f1c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +colors/ + + # ---> Python # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/README.md b/README.md index 7350d56..460b397 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,116 @@ -# LegoScripts +# Lego Scripts -Collection of various Lego related scripts \ No newline at end of file +Collection of various Lego related scripts + +## Rebrickable Color Data Processing Scripts + +1. `rb_colorgrabber.py` - Downloads color data from Rebrickable API +2. `rb2bl_colors.py` - Processes downloaded color data to extract IDs + +### Prerequisites + +- Python 3.7+ +- Required Python packages: + ``` + requests + ``` +- Rebrickable API key (get one from https://rebrickable.com/api/) +- Input file: `colors.csv` from https://rebrickable.com/downloads/ + +### Script Details + +#### rb_colorgrabber.py + +Downloads color data from Rebrickable API and saves it as individual JSON files. + +##### Features: +- Fetches color data from Rebrickable API +- Caches results in JSON files +- Implements rate limiting (10-second delay between requests) +- Handles corrupted cache files +- Provides detailed logging + +##### Usage: +1. Set your API key in the script +2. Ensure `colors.csv` is in the same directory +3. Run: +```bash +python rb_colorgrabber.py +``` + +The script will create a `colors` directory and save individual JSON files for each color ID. + +#### process_colors.py + +Processes the downloaded JSON files to extract BrickLink IDs and create a combined dataset. + +##### Features: +- Reads color data from JSON files +- Extracts BrickLink IDs +- Creates a combined output file +- Handles missing or corrupt data + +##### Usage: +1. Ensure you have run `rb_colorgrabber.py` first +2. Run: +```bash +python rb2bl_colors.py +``` + +The script will create `colors_combined.csv` with the processed data. + +### File Structure +``` +. +├── colors/ # Directory containing JSON files (created by fetch_colors.py) +│ ├── 1.json +│ ├── 2.json +│ └── ... +├── colors.csv # Input file from Rebrickable +├── fetch_colors.py # Script to fetch data from API +├── process_colors.py # Script to process JSON files +└── colors_combined.txt # Output file (created by process_colors.py) +``` + +### Error Handling + +Both scripts include comprehensive error handling: +- API connection errors +- File read/write errors +- JSON parsing errors +- Missing data handling + +### Logging + +The scripts provide detailed logging information: +- API request status +- File operations +- Error messages +- Processing progress + +### Best Practices + +When using these scripts: +1. Don't modify the JSON files manually +2. Keep the API key secure +3. Respect Rebrickable's rate limits +4. Back up your data before processing + +### Troubleshooting + +Common issues and solutions: +1. API Key errors: + - Verify your API key is correct + - Check your API key permissions + +2. Missing files: + - Ensure `colors.csv` is present + - Run `rb_colorgrabber.py` before `rb2bl_colors.py` + +3. Rate limiting: + - The scripts include a 10-second delay between requests + - Don't modify this unless you have permission from Rebrickable + +### License + +These scripts are provided as-is under the MIT License. diff --git a/color-processor.py b/color-processor.py new file mode 100644 index 0000000..7f290fe --- /dev/null +++ b/color-processor.py @@ -0,0 +1,110 @@ +import csv +import json +import requests +import time +from pathlib import Path +from typing import Optional, Dict +from dataclasses import dataclass +import logging +from requests.exceptions import RequestException + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s' +) + +@dataclass +class APIConfig: + """Configuration for the Rebrickable API""" + api_key: str + base_url: str = 'https://rebrickable.com/api/v3/lego/colors/' + rate_limit_delay: int = 10 # seconds between API calls + + @property + def headers(self) -> Dict[str, str]: + return { + 'Accept': 'application/json', + 'Authorization': f'key {self.api_key}' + } + +class ColorDataFetcher: + """Handles fetching and saving of color data from the Rebrickable API""" + + def __init__(self, config: APIConfig, output_dir: Path): + self.config = config + self.output_dir = output_dir + self.output_dir.mkdir(exist_ok=True) + + def fetch_and_save_color(self, color_id: int) -> Optional[Dict]: + """Fetch color data from API and save to JSON file if not already exists""" + if color_id == -1: # Skip unknown color + return None + + json_path = self.output_dir / f'{color_id}.json' + + # Check if file already exists + if json_path.exists(): + try: + with json_path.open('r', encoding='utf-8') as f: + data = json.load(f) + logging.info(f"JSON file for color ID {color_id} already exists, skipping API call") + return data + except json.JSONDecodeError: + logging.warning(f"Corrupted JSON file for color ID {color_id}, will retry API call") + json_path.unlink() + + # Fetch from API + try: + url = f'{self.config.base_url}{color_id}' + response = requests.get(url, headers=self.config.headers) + response.raise_for_status() + + data = response.json() + + # Save to JSON file + with json_path.open('w', encoding='utf-8') as f: + json.dump(data, f, indent=2) + + logging.info(f"Successfully fetched and saved color ID {color_id}") + time.sleep(self.config.rate_limit_delay) # Respect rate limits + return data + + except RequestException as e: + logging.error(f"Error fetching data for color ID {color_id}: {e}") + return None + +def process_colors(input_csv: Path, fetcher: ColorDataFetcher) -> None: + """Process colors from CSV and fetch their data""" + try: + # Read color IDs from CSV + with input_csv.open('r', encoding='utf-8') as f: + reader = csv.DictReader(f) + color_ids = [int(row['id']) for row in reader] + + # Fetch and save data for each color + for color_id in color_ids: + fetcher.fetch_and_save_color(color_id) + + except Exception as e: + logging.error(f"Error processing colors: {e}") + raise + +def main(): + """Main entry point""" + try: + api_key = '' # Should be loaded from environment variable or config file + config = APIConfig(api_key=api_key) + fetcher = ColorDataFetcher(config, output_dir=Path('colors')) + + process_colors( + input_csv=Path('colors.csv'), + fetcher=fetcher + ) + + except Exception as e: + logging.error(f"Application error: {e}") + raise + +if __name__ == '__main__': + main() diff --git a/rb_colorgrabber.py b/rb_colorgrabber.py new file mode 100644 index 0000000..7f290fe --- /dev/null +++ b/rb_colorgrabber.py @@ -0,0 +1,110 @@ +import csv +import json +import requests +import time +from pathlib import Path +from typing import Optional, Dict +from dataclasses import dataclass +import logging +from requests.exceptions import RequestException + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s' +) + +@dataclass +class APIConfig: + """Configuration for the Rebrickable API""" + api_key: str + base_url: str = 'https://rebrickable.com/api/v3/lego/colors/' + rate_limit_delay: int = 10 # seconds between API calls + + @property + def headers(self) -> Dict[str, str]: + return { + 'Accept': 'application/json', + 'Authorization': f'key {self.api_key}' + } + +class ColorDataFetcher: + """Handles fetching and saving of color data from the Rebrickable API""" + + def __init__(self, config: APIConfig, output_dir: Path): + self.config = config + self.output_dir = output_dir + self.output_dir.mkdir(exist_ok=True) + + def fetch_and_save_color(self, color_id: int) -> Optional[Dict]: + """Fetch color data from API and save to JSON file if not already exists""" + if color_id == -1: # Skip unknown color + return None + + json_path = self.output_dir / f'{color_id}.json' + + # Check if file already exists + if json_path.exists(): + try: + with json_path.open('r', encoding='utf-8') as f: + data = json.load(f) + logging.info(f"JSON file for color ID {color_id} already exists, skipping API call") + return data + except json.JSONDecodeError: + logging.warning(f"Corrupted JSON file for color ID {color_id}, will retry API call") + json_path.unlink() + + # Fetch from API + try: + url = f'{self.config.base_url}{color_id}' + response = requests.get(url, headers=self.config.headers) + response.raise_for_status() + + data = response.json() + + # Save to JSON file + with json_path.open('w', encoding='utf-8') as f: + json.dump(data, f, indent=2) + + logging.info(f"Successfully fetched and saved color ID {color_id}") + time.sleep(self.config.rate_limit_delay) # Respect rate limits + return data + + except RequestException as e: + logging.error(f"Error fetching data for color ID {color_id}: {e}") + return None + +def process_colors(input_csv: Path, fetcher: ColorDataFetcher) -> None: + """Process colors from CSV and fetch their data""" + try: + # Read color IDs from CSV + with input_csv.open('r', encoding='utf-8') as f: + reader = csv.DictReader(f) + color_ids = [int(row['id']) for row in reader] + + # Fetch and save data for each color + for color_id in color_ids: + fetcher.fetch_and_save_color(color_id) + + except Exception as e: + logging.error(f"Error processing colors: {e}") + raise + +def main(): + """Main entry point""" + try: + api_key = '' # Should be loaded from environment variable or config file + config = APIConfig(api_key=api_key) + fetcher = ColorDataFetcher(config, output_dir=Path('colors')) + + process_colors( + input_csv=Path('colors.csv'), + fetcher=fetcher + ) + + except Exception as e: + logging.error(f"Application error: {e}") + raise + +if __name__ == '__main__': + main() From b24100d4f78bd6ec370bb109b72984e1f9d28570 Mon Sep 17 00:00:00 2001 From: FrederikBaerentsen Date: Tue, 28 Jan 2025 16:59:55 +0100 Subject: [PATCH 3/3] Moved script --- color-processor.py | 110 --------------------------------------------- 1 file changed, 110 deletions(-) delete mode 100644 color-processor.py diff --git a/color-processor.py b/color-processor.py deleted file mode 100644 index 7f290fe..0000000 --- a/color-processor.py +++ /dev/null @@ -1,110 +0,0 @@ -import csv -import json -import requests -import time -from pathlib import Path -from typing import Optional, Dict -from dataclasses import dataclass -import logging -from requests.exceptions import RequestException - -# Configure logging -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(levelname)s - %(message)s' -) - -@dataclass -class APIConfig: - """Configuration for the Rebrickable API""" - api_key: str - base_url: str = 'https://rebrickable.com/api/v3/lego/colors/' - rate_limit_delay: int = 10 # seconds between API calls - - @property - def headers(self) -> Dict[str, str]: - return { - 'Accept': 'application/json', - 'Authorization': f'key {self.api_key}' - } - -class ColorDataFetcher: - """Handles fetching and saving of color data from the Rebrickable API""" - - def __init__(self, config: APIConfig, output_dir: Path): - self.config = config - self.output_dir = output_dir - self.output_dir.mkdir(exist_ok=True) - - def fetch_and_save_color(self, color_id: int) -> Optional[Dict]: - """Fetch color data from API and save to JSON file if not already exists""" - if color_id == -1: # Skip unknown color - return None - - json_path = self.output_dir / f'{color_id}.json' - - # Check if file already exists - if json_path.exists(): - try: - with json_path.open('r', encoding='utf-8') as f: - data = json.load(f) - logging.info(f"JSON file for color ID {color_id} already exists, skipping API call") - return data - except json.JSONDecodeError: - logging.warning(f"Corrupted JSON file for color ID {color_id}, will retry API call") - json_path.unlink() - - # Fetch from API - try: - url = f'{self.config.base_url}{color_id}' - response = requests.get(url, headers=self.config.headers) - response.raise_for_status() - - data = response.json() - - # Save to JSON file - with json_path.open('w', encoding='utf-8') as f: - json.dump(data, f, indent=2) - - logging.info(f"Successfully fetched and saved color ID {color_id}") - time.sleep(self.config.rate_limit_delay) # Respect rate limits - return data - - except RequestException as e: - logging.error(f"Error fetching data for color ID {color_id}: {e}") - return None - -def process_colors(input_csv: Path, fetcher: ColorDataFetcher) -> None: - """Process colors from CSV and fetch their data""" - try: - # Read color IDs from CSV - with input_csv.open('r', encoding='utf-8') as f: - reader = csv.DictReader(f) - color_ids = [int(row['id']) for row in reader] - - # Fetch and save data for each color - for color_id in color_ids: - fetcher.fetch_and_save_color(color_id) - - except Exception as e: - logging.error(f"Error processing colors: {e}") - raise - -def main(): - """Main entry point""" - try: - api_key = '' # Should be loaded from environment variable or config file - config = APIConfig(api_key=api_key) - fetcher = ColorDataFetcher(config, output_dir=Path('colors')) - - process_colors( - input_csv=Path('colors.csv'), - fetcher=fetcher - ) - - except Exception as e: - logging.error(f"Application error: {e}") - raise - -if __name__ == '__main__': - main()