Files
ComicRack_CompleteMetadata/cr_sync_worker.py
2026-01-29 17:23:01 +01:00

278 lines
11 KiB
Python

import json, sys, os, zipfile, shutil, tkinter as tk, tempfile, threading, queue, time
from tkinter import ttk
import xml.etree.ElementTree as ET
from concurrent.futures import ThreadPoolExecutor, as_completed
class SyncEngine:
def __init__(self, mode, job_file):
self.mode = mode
with open(job_file, 'r', encoding='utf-8-sig') as f: self.books = json.load(f)
self.root = tk.Tk()
self.root.title(f"CompleteMetadata {mode.upper()}")
self.root.geometry("450x150")
self.progress = ttk.Progressbar(self.root, length=400, mode='determinate')
self.progress.pack(pady=20)
self.lbl = tk.Label(self.root, text=f"Ready to {mode} {len(self.books)} books...")
self.lbl.pack()
self.timer_lbl = tk.Label(self.root, text="Elapsed: 0:00")
self.timer_lbl.pack()
self.start_btn = tk.Button(self.root, text="Start", command=self.run)
self.start_btn.pack(pady=10)
self.start_time = None
self.timer_running = False
self.results_file = os.path.join(os.path.dirname(__file__), "sync_results.json")
self.log_file = os.path.join(os.path.dirname(__file__), "sync_errors.log")
self.lock_file = os.path.join(os.path.dirname(__file__), "sync.lock")
# Queue for thread-safe UI updates
self.update_queue = queue.Queue()
self.results = {}
self.errors = []
self.root.mainloop()
def run(self):
self.start_btn.config(state="disabled")
self.lbl.config(text="Starting...")
self.results = {}
self.errors = []
# Create lock file to signal we're working
with open(self.lock_file, 'w') as f:
f.write(str(os.getpid()))
# Start timer
self.start_time = time.time()
self.timer_running = True
self._update_timer()
# Start background worker thread
worker = threading.Thread(target=self._worker_thread, daemon=True)
worker.start()
# Start polling for UI updates
self._poll_updates()
def _update_timer(self):
"""Update elapsed time display"""
if not self.timer_running or self.start_time is None:
return
elapsed = int(time.time() - self.start_time)
mins, secs = divmod(elapsed, 60)
self.timer_lbl.config(text=f"Elapsed: {mins}:{secs:02d}")
self.root.after(1000, self._update_timer)
def _worker_thread(self):
"""Runs in background thread - does all the heavy lifting"""
total = len(self.books)
with ThreadPoolExecutor(max_workers=4) as executor:
if self.mode == "export":
futures = {executor.submit(self.do_export, b): b for b in self.books}
else:
futures = {executor.submit(self.do_import, b): b for b in self.books}
completed = 0
for future in as_completed(futures):
book = futures[future]
try:
res = future.result()
self.results[book['ID']] = res
except Exception as e:
self.errors.append(f"Error on {book['FilePath']}: {str(e)}")
completed += 1
# Send progress update to main thread via queue
self.update_queue.put(("progress", completed, total))
# Signal completion
self.update_queue.put(("done", None, None))
def _poll_updates(self):
"""Polls the queue for updates from worker thread - runs on main thread"""
try:
while True:
msg_type, val1, val2 = self.update_queue.get_nowait()
if msg_type == "progress":
completed, total = val1, val2
self.progress['value'] = (completed / total) * 100
self.lbl.config(text=f"Processing {completed}/{total}")
elif msg_type == "done":
self._finish()
return
except queue.Empty:
pass
# Keep polling every 50ms - keeps UI responsive
self.root.after(50, self._poll_updates)
def _finish(self):
"""Called when all work is complete"""
self.timer_running = False
with open(self.results_file, 'w', encoding='utf-8') as f:
json.dump(self.results, f)
# Remove lock file to signal completion
if os.path.exists(self.lock_file):
os.unlink(self.lock_file)
if self.errors:
with open(self.log_file, 'w', encoding='utf-8') as f:
f.write("\n".join(self.errors))
self.lbl.config(text=f"Done with {len(self.errors)} errors. Check log.")
else:
self.lbl.config(text="Success! All files processed.")
def do_export(self, b):
# v2.0 Schema element order (Rating added, HasBeenRead as standard element)
SCHEMA_ORDER = [
"Title", "Series", "Number", "Count", "Volume",
"AlternateSeries", "AlternateNumber", "AlternateCount",
"Summary", "Notes", "Year", "Month", "Day",
"Writer", "Penciller", "Inker", "Colorist", "Letterer", "CoverArtist", "Editor",
"Publisher", "Imprint", "Genre", "Web", "PageCount", "LanguageISO", "Format",
"BlackAndWhite", "Manga", "Characters", "Teams", "Locations", "ScanInformation",
"StoryArc", "SeriesGroup", "AgeRating", "HasBeenRead", "Pages", "CommunityRating", "Rating",
"MainCharacterOrTeam", "Review"
]
root = ET.Element("ComicInfo", {
"xmlns:xsd": "http://www.w3.org/2001/XMLSchema",
"xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance"
})
# Skip internal fields
skip = {"ID", "FilePath", "Pages", "CustomValuesStore"}
# Add elements in schema order
for field in SCHEMA_ORDER:
if field == "Pages":
# Handle Pages separately
pages_data = b.get("Pages", [])
if pages_data:
pages_el = ET.SubElement(root, "Pages")
for p in pages_data:
attrs = {"Image": str(p["Image"])}
if p.get("ImageWidth"): attrs["ImageWidth"] = str(p["ImageWidth"])
if p.get("ImageHeight"): attrs["ImageHeight"] = str(p["ImageHeight"])
if p.get("Type"): attrs["Type"] = p["Type"]
ET.SubElement(pages_el, "Page", attrs)
elif field not in skip:
val = b.get(field)
if val is not None and val != "" and val != 0:
ET.SubElement(root, field).text = str(val)
# Add CustomValues section for CR custom fields
custom_store = b.get("CustomValuesStore", "")
if custom_store:
custom_el = ET.SubElement(root, "CustomValues")
# Parse the comma-separated key=value pairs from CR's CustomValuesStore
for pair in custom_store.split(","):
if "=" in pair:
key, val = pair.split("=", 1)
if key and val:
ET.SubElement(custom_el, "CustomValue", {"name": key, "value": val})
# Pretty print XML
self._indent_xml(root)
xml_bytes = ET.tostring(root, encoding='utf-8', xml_declaration=True)
# Check if existing ComicInfo.xml is identical - skip rewrite if so
try:
with zipfile.ZipFile(b['FilePath'], 'r') as z:
if 'ComicInfo.xml' in z.namelist():
existing = z.read('ComicInfo.xml')
if existing == xml_bytes:
return {"status": "skipped", "reason": "unchanged"}
except:
pass # If we can't read it, proceed with the write
# Inject into CBZ using optimized streaming approach
# Use SSD temp directory for faster writes, then atomic replace
with tempfile.NamedTemporaryFile(delete=False, suffix='.cbz') as tmp:
temp_path = tmp.name
try:
with zipfile.ZipFile(b['FilePath'], 'r') as zin:
with zipfile.ZipFile(temp_path, 'w', compression=zipfile.ZIP_STORED) as zout:
for item in zin.infolist():
if item.filename.lower() != 'comicinfo.xml':
# Stream data instead of loading entire files into RAM
new_info = zipfile.ZipInfo(item.filename, date_time=item.date_time)
new_info.compress_type = zipfile.ZIP_STORED
with zin.open(item, 'r') as src:
with zout.open(new_info, 'w') as dst:
shutil.copyfileobj(src, dst, length=4 * 1024 * 1024)
zout.writestr('ComicInfo.xml', xml_bytes)
# Move temp to destination (handles cross-drive automatically)
shutil.move(temp_path, b['FilePath'])
except:
# Clean up temp file on failure
if os.path.exists(temp_path):
os.unlink(temp_path)
raise
return {"status": "success"}
def _indent_xml(self, elem, level=0):
"""Add pretty-print indentation to XML"""
indent = "\n" + " " * level
if len(elem):
if not elem.text or not elem.text.strip():
elem.text = indent + " "
if not elem.tail or not elem.tail.strip():
elem.tail = indent
last_child = None
for child in elem:
self._indent_xml(child, level + 1)
last_child = child
if last_child is not None and (not last_child.tail or not last_child.tail.strip()):
last_child.tail = indent
else:
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = indent
def do_import(self, b):
path = b['FilePath']
with zipfile.ZipFile(path, 'r') as z:
if 'ComicInfo.xml' in z.namelist():
root = ET.fromstring(z.read('ComicInfo.xml'))
data = {}
for child in root:
if child.tag == "Pages":
# Skip pages for now (complex to import back)
pass
elif child.tag == "CustomValues":
# Parse custom values into a dict
custom_values = {}
for cv in child.findall("CustomValue"):
name = cv.get("name")
value = cv.get("value")
if name and value:
custom_values[name] = value
data["CustomValues"] = custom_values
else:
data[child.tag] = child.text
# Convert HasBeenRead from string to boolean
if "HasBeenRead" in data and data["HasBeenRead"]:
data["HasBeenRead"] = data["HasBeenRead"].lower() == "true"
return data
return {}
if __name__ == "__main__":
SyncEngine(sys.argv[1], sys.argv[2])