ComicRack_CompleteMetadata/cr_sync_worker.py

import json, sys, os, zipfile, shutil, tkinter as tk, tempfile, threading, queue, time
from tkinter import ttk
import xml.etree.ElementTree as ET
from concurrent.futures import ThreadPoolExecutor, as_completed

class SyncEngine:
    def __init__(self, mode, job_file):
        self.mode = mode
        with open(job_file, 'r', encoding='utf-8-sig') as f: self.books = json.load(f)

        self.root = tk.Tk()
        self.root.title(f"CompleteMetadata {mode.upper()}")
        self.root.geometry("450x150")

        self.progress = ttk.Progressbar(self.root, length=400, mode='determinate')
        self.progress.pack(pady=20)

        self.lbl = tk.Label(self.root, text=f"Ready to {mode} {len(self.books)} books...")
        self.lbl.pack()

        self.timer_lbl = tk.Label(self.root, text="Elapsed: 0:00")
        self.timer_lbl.pack()

        self.start_btn = tk.Button(self.root, text="Start", command=self.run)
        self.start_btn.pack(pady=10)

        self.start_time = None
        self.timer_running = False

        self.results_file = os.path.join(os.path.dirname(__file__), "sync_results.json")
        self.log_file = os.path.join(os.path.dirname(__file__), "sync_errors.log")
        self.lock_file = os.path.join(os.path.dirname(__file__), "sync.lock")

        # Queue for thread-safe UI updates
        self.update_queue = queue.Queue()
        self.results = {}
        self.errors = []

        self.root.mainloop()

    def run(self):
        self.start_btn.config(state="disabled")
        self.lbl.config(text="Starting...")
        self.results = {}
        self.errors = []

        # Create lock file to signal we're working
        with open(self.lock_file, 'w') as f:
            f.write(str(os.getpid()))

        # Start timer
        self.start_time = time.time()
        self.timer_running = True
        self._update_timer()

        # Start background worker thread
        worker = threading.Thread(target=self._worker_thread, daemon=True)
        worker.start()

        # Start polling for UI updates
        self._poll_updates()

    def _update_timer(self):
        """Update elapsed time display"""
        if not self.timer_running or self.start_time is None:
            return
        elapsed = int(time.time() - self.start_time)
        mins, secs = divmod(elapsed, 60)
        self.timer_lbl.config(text=f"Elapsed: {mins}:{secs:02d}")
        self.root.after(1000, self._update_timer)

    def _worker_thread(self):
        """Runs in background thread - does all the heavy lifting"""
        total = len(self.books)

        with ThreadPoolExecutor(max_workers=4) as executor:
            if self.mode == "export":
                futures = {executor.submit(self.do_export, b): b for b in self.books}
            else:
                futures = {executor.submit(self.do_import, b): b for b in self.books}

            completed = 0
            for future in as_completed(futures):
                book = futures[future]
                try:
                    res = future.result()
                    self.results[book['ID']] = res
                except Exception as e:
                    self.errors.append(f"Error on {book['FilePath']}: {str(e)}")

                completed += 1
                # Send progress update to main thread via queue
                self.update_queue.put(("progress", completed, total))

        # Signal completion
        self.update_queue.put(("done", None, None))

    def _poll_updates(self):
        """Polls the queue for updates from worker thread - runs on main thread"""
        try:
            while True:
                msg_type, val1, val2 = self.update_queue.get_nowait()

                if msg_type == "progress":
                    completed, total = val1, val2
                    self.progress['value'] = (completed / total) * 100
                    self.lbl.config(text=f"Processing {completed}/{total}")

                elif msg_type == "done":
                    self._finish()
                    return

        except queue.Empty:
            pass

        # Keep polling every 50ms - keeps UI responsive
        self.root.after(50, self._poll_updates)

    def _finish(self):
        """Called when all work is complete"""
        self.timer_running = False

        with open(self.results_file, 'w', encoding='utf-8') as f:
            json.dump(self.results, f)

        # Remove lock file to signal completion
        if os.path.exists(self.lock_file):
            os.unlink(self.lock_file)

        if self.errors:
            with open(self.log_file, 'w', encoding='utf-8') as f:
                f.write("\n".join(self.errors))
            self.lbl.config(text=f"Done with {len(self.errors)} errors. Check log.")
        else:
            self.lbl.config(text="Success! All files processed.")

    def do_export(self, b):
        # v2.0 Schema element order (Rating added, HasBeenRead as standard element)
        SCHEMA_ORDER = [
            "Title", "Series", "Number", "Count", "Volume",
            "AlternateSeries", "AlternateNumber", "AlternateCount",
            "Summary", "Notes", "Year", "Month", "Day",
            "Writer", "Penciller", "Inker", "Colorist", "Letterer", "CoverArtist", "Editor",
            "Publisher", "Imprint", "Genre", "Web", "PageCount", "LanguageISO", "Format",
            "BlackAndWhite", "Manga", "Characters", "Teams", "Locations", "ScanInformation",
            "StoryArc", "SeriesGroup", "AgeRating", "HasBeenRead", "Pages", "CommunityRating", "Rating",
            "MainCharacterOrTeam", "Review"
        ]

        root = ET.Element("ComicInfo", {
            "xmlns:xsd": "http://www.w3.org/2001/XMLSchema",
            "xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance"
        })

        # Skip internal fields
        skip = {"ID", "FilePath", "Pages", "CustomValuesStore"}

        # Add elements in schema order
        for field in SCHEMA_ORDER:
            if field == "Pages":
                # Handle Pages separately
                pages_data = b.get("Pages", [])
                if pages_data:
                    pages_el = ET.SubElement(root, "Pages")
                    for p in pages_data:
                        attrs = {"Image": str(p["Image"])}
                        if p.get("ImageWidth"): attrs["ImageWidth"] = str(p["ImageWidth"])
                        if p.get("ImageHeight"): attrs["ImageHeight"] = str(p["ImageHeight"])
                        if p.get("Type"): attrs["Type"] = p["Type"]
                        ET.SubElement(pages_el, "Page", attrs)
            elif field not in skip:
                val = b.get(field)
                if val is not None and val != "" and val != 0:
                    ET.SubElement(root, field).text = str(val)

        # Add CustomValues section for CR custom fields
        custom_store = b.get("CustomValuesStore", "")
        if custom_store:
            custom_el = ET.SubElement(root, "CustomValues")
            # Parse the comma-separated key=value pairs from CR's CustomValuesStore
            for pair in custom_store.split(","):
                if "=" in pair:
                    key, val = pair.split("=", 1)
                    if key and val:
                        ET.SubElement(custom_el, "CustomValue", {"name": key, "value": val})

        # Pretty print XML
        self._indent_xml(root)
        xml_bytes = ET.tostring(root, encoding='utf-8', xml_declaration=True)

        # Check if existing ComicInfo.xml is identical - skip rewrite if so
        try:
            with zipfile.ZipFile(b['FilePath'], 'r') as z:
                if 'ComicInfo.xml' in z.namelist():
                    existing = z.read('ComicInfo.xml')
                    if existing == xml_bytes:
                        return {"status": "skipped", "reason": "unchanged"}
        except:
            pass  # If we can't read it, proceed with the write

        # Inject into CBZ using optimized streaming approach
        # Use SSD temp directory for faster writes, then atomic replace
        with tempfile.NamedTemporaryFile(delete=False, suffix='.cbz') as tmp:
            temp_path = tmp.name

        try:
            with zipfile.ZipFile(b['FilePath'], 'r') as zin:
                with zipfile.ZipFile(temp_path, 'w', compression=zipfile.ZIP_STORED) as zout:
                    for item in zin.infolist():
                        if item.filename.lower() != 'comicinfo.xml':
                            # Stream data instead of loading entire files into RAM
                            new_info = zipfile.ZipInfo(item.filename, date_time=item.date_time)
                            new_info.compress_type = zipfile.ZIP_STORED
                            with zin.open(item, 'r') as src:
                                with zout.open(new_info, 'w') as dst:
                                    shutil.copyfileobj(src, dst, length=4 * 1024 * 1024)
                    zout.writestr('ComicInfo.xml', xml_bytes)

            # Move temp to destination (handles cross-drive automatically)
            shutil.move(temp_path, b['FilePath'])
        except:
            # Clean up temp file on failure
            if os.path.exists(temp_path):
                os.unlink(temp_path)
            raise

        return {"status": "success"}

    def _indent_xml(self, elem, level=0):
        """Add pretty-print indentation to XML"""
        indent = "\n" + "  " * level
        if len(elem):
            if not elem.text or not elem.text.strip():
                elem.text = indent + "  "
            if not elem.tail or not elem.tail.strip():
                elem.tail = indent
            last_child = None
            for child in elem:
                self._indent_xml(child, level + 1)
                last_child = child
            if last_child is not None and (not last_child.tail or not last_child.tail.strip()):
                last_child.tail = indent
        else:
            if level and (not elem.tail or not elem.tail.strip()):
                elem.tail = indent

    def do_import(self, b):
        path = b['FilePath']
        with zipfile.ZipFile(path, 'r') as z:
            if 'ComicInfo.xml' in z.namelist():
                root = ET.fromstring(z.read('ComicInfo.xml'))
                data = {}

                for child in root:
                    if child.tag == "Pages":
                        # Skip pages for now (complex to import back)
                        pass
                    elif child.tag == "CustomValues":
                        # Parse custom values into a dict
                        custom_values = {}
                        for cv in child.findall("CustomValue"):
                            name = cv.get("name")
                            value = cv.get("value")
                            if name and value:
                                custom_values[name] = value
                        data["CustomValues"] = custom_values
                    else:
                        data[child.tag] = child.text

                # Convert HasBeenRead from string to boolean
                if "HasBeenRead" in data and data["HasBeenRead"]:
                    data["HasBeenRead"] = data["HasBeenRead"].lower() == "true"

                return data
        return {}

if __name__ == "__main__":
    SyncEngine(sys.argv[1], sys.argv[2])