Added selective scanning of a folder or file

2026-01-30 15:37:04 +01:00
parent 492048759c
commit c9dbea6c8b
2 changed files with 520 additions and 4 deletions
@@ -134,10 +134,17 @@ def _count_cbz(root: Path) -> int:
 def _parent_rel(rel: str) -> str:
    return "" if "/" not in rel else rel.rsplit("/", 1)[0]

-def _read_comicinfo(cbz_path: Path) -> Dict[str, Any]:
-    """Lightweight ComicInfo.xml reader."""
+def _read_comicinfo(cbz_path: Path, debug: bool = False) -> Dict[str, Any]:
+    """
+    Lightweight ComicInfo.xml reader.
+    Set debug=True to enable detailed logging of XML parsing.
+    """
    from xml.etree import ElementTree as ET
    meta: Dict[str, Any] = {}
+
+    if debug:
+        app_logger.error(f"[DEBUG] Reading ComicInfo.xml from: {cbz_path}")
+
    try:
        with zipfile.ZipFile(cbz_path, "r") as zf:
            xml_name = None
@@ -145,23 +152,68 @@ def _read_comicinfo(cbz_path: Path) -> Dict[str, Any]:
                if n.lower().endswith("comicinfo.xml") and not n.endswith("/"):
                    xml_name = n
                    break
+
            if not xml_name:
+                if debug:
+                    app_logger.error(f"[DEBUG] No ComicInfo.xml found in {cbz_path.name}")
                return meta
+
+            if debug:
+                app_logger.error(f"[DEBUG] Found ComicInfo.xml at: {xml_name}")
+
            with zf.open(xml_name) as fp:
                tree = ET.parse(fp)
                root = tree.getroot()
+
+                if debug:
+                    app_logger.error(f"[DEBUG] XML root tag: {root.tag}")
+                    app_logger.error(f"[DEBUG] Total XML elements: {len(list(root))}")
+
+                elements_processed = 0
+                elements_skipped = 0
+
                for el in root:
                    k = el.tag.lower()
                    v = (el.text or "").strip()
+
+                    if debug:
+                        if v:
+                            app_logger.error(f"[DEBUG]   ✓ {el.tag} = '{v}' (stored as '{k}')")
+                        else:
+                            app_logger.error(f"[DEBUG]   ✗ {el.tag} = (empty/whitespace) - SKIPPED")
+                            elements_skipped += 1
+
                    if v:
                        meta[k] = v
+                        elements_processed += 1
+
+                if debug:
+                    app_logger.error(f"[DEBUG] Elements processed: {elements_processed}, skipped: {elements_skipped}")
+
+                # Special handling
                if "title" not in meta and "booktitle" in meta:
                    meta["title"] = meta.get("booktitle")
+                    if debug:
+                        app_logger.error(f"[DEBUG] Using 'booktitle' as 'title': {meta['title']}")
+
                for k in ("number", "volume", "year", "month", "day"):
                    if k in meta:
                        meta[k] = meta[k].strip()
-    except Exception:
-        pass
+
+                if debug:
+                    app_logger.error(f"[DEBUG] Final metadata keys: {list(meta.keys())}")
+                    app_logger.error(f"[DEBUG] Metadata to be stored:")
+                    for key, val in meta.items():
+                        app_logger.error(f"[DEBUG]   {key}: {val}")
+
+    except Exception as e:
+        if debug:
+            app_logger.error(f"[DEBUG] Error reading ComicInfo.xml from {cbz_path.name}: {e}")
+            import traceback
+            app_logger.error(f"[DEBUG] Traceback:\n{traceback.format_exc()}")
+        else:
+            app_logger.debug(f"Failed to read ComicInfo.xml from {cbz_path.name}: {e}")
+
    return meta

 def _index_progress(rel: str):
@@ -331,10 +383,361 @@ def _start_scan(force=False):
    t = threading.Thread(target=_run_scan, daemon=True)
    t.start()

+def _rescan_path(rel_path: str):
+    """
+    Rescan a specific file or folder.
+    rel_path: relative path from LIBRARY_DIR (e.g., "folder/comic.cbz" or "folder")
+    """
+    global _LIBRARY_WATCHER
+
+    app_logger.error(f"[DEBUG] === Starting Selective Rescan ===")
+    app_logger.error(f"[DEBUG] Relative path: {rel_path}")
+    app_logger.error(f"[DEBUG] Library directory: {LIBRARY_DIR}")
+
+    # Pause the watcher during rescan to avoid database conflicts
+    watcher_was_running = False
+    if _LIBRARY_WATCHER and _LIBRARY_WATCHER.observer and _LIBRARY_WATCHER.observer.is_alive():
+        app_logger.error("[DEBUG] Pausing filesystem watcher during selective rescan")
+        _LIBRARY_WATCHER.stop()
+        watcher_was_running = True
+
+    conn = db.connect()
+    try:
+        abs_path = LIBRARY_DIR / rel_path
+        app_logger.error(f"[DEBUG] Absolute path: {abs_path}")
+        app_logger.error(f"[DEBUG] Path exists: {abs_path.exists()}")
+
+        if not abs_path.exists():
+            app_logger.error(f"[DEBUG] ERROR: Path does not exist: {abs_path}")
+            return {"success": False, "error": "Path does not exist"}
+
+        rescanned_count = 0
+
+        app_logger.error(f"[DEBUG] Is file: {abs_path.is_file()}")
+        app_logger.error(f"[DEBUG] Is directory: {abs_path.is_dir()}")
+
+        if abs_path.is_file():
+            # Rescan single file
+            app_logger.error(f"[DEBUG] Processing single file: {abs_path.name}")
+            if abs_path.suffix.lower() == ".cbz":
+                st = abs_path.stat()
+                db.upsert_file(
+                    conn,
+                    rel=rel_path,
+                    name=abs_path.stem,
+                    size=st.st_size,
+                    mtime=st.st_mtime,
+                    parent=_parent_rel(rel_path),
+                    ext="cbz",
+                )
+                # Enable debug logging for selective rescan
+                meta = _read_comicinfo(abs_path, debug=True)
+                if meta:
+                    app_logger.error(f"[DEBUG] Upserting metadata for {rel_path} with {len(meta)} fields")
+                    db.upsert_meta(conn, rel=rel_path, meta=meta)
+                    app_logger.error(f"[DEBUG] Metadata upsert completed for {rel_path}")
+                else:
+                    app_logger.error(f"[DEBUG] No metadata extracted from {rel_path}")
+
+                # Update FTS if enabled
+                if db.has_fts5():
+                    text_parts = [
+                        abs_path.stem,
+                        meta.get("title", ""),
+                        meta.get("series", ""),
+                        meta.get("writer", ""),
+                        meta.get("publisher", "")
+                    ]
+                    text = " ".join([p for p in text_parts if p])
+                    conn.execute(
+                        "INSERT OR REPLACE INTO fts(rel, text) VALUES (?, ?)",
+                        (rel_path, text)
+                    )
+
+                rescanned_count = 1
+                app_logger.warning(f"Rescanned file: {rel_path}")
+        else:
+            # Rescan folder recursively
+            for root, dirs, files in os.walk(abs_path, followlinks=True):
+                root_path = Path(root)
+
+                for fn in files:
+                    file_path = root_path / fn
+                    if file_path.suffix.lower() != ".cbz":
+                        continue
+
+                    try:
+                        if not file_path.exists():
+                            continue
+
+                        file_rel = file_path.relative_to(LIBRARY_DIR).as_posix()
+                        st = file_path.stat()
+
+                        db.upsert_file(
+                            conn,
+                            rel=file_rel,
+                            name=file_path.stem,
+                            size=st.st_size,
+                            mtime=st.st_mtime,
+                            parent=_parent_rel(file_rel),
+                            ext="cbz",
+                        )
+                        meta = _read_comicinfo(file_path)
+                        if meta:
+                            db.upsert_meta(conn, rel=file_rel, meta=meta)
+
+                        # Update FTS if enabled
+                        if db.has_fts5():
+                            text_parts = [
+                                file_path.stem,
+                                meta.get("title", ""),
+                                meta.get("series", ""),
+                                meta.get("writer", ""),
+                                meta.get("publisher", "")
+                            ]
+                            text = " ".join([p for p in text_parts if p])
+                            conn.execute(
+                                "INSERT OR REPLACE INTO fts(rel, text) VALUES (?, ?)",
+                                (file_rel, text)
+                            )
+
+                        rescanned_count += 1
+                        app_logger.info(f"Rescanned: {file_rel}")
+                    except Exception as e:
+                        app_logger.error(f"Failed to rescan {file_path}: {e}")
+                        continue
+
+            app_logger.warning(f"Rescanned folder: {rel_path} ({rescanned_count} files)")
+
+        conn.commit()
+        return {"success": True, "rescanned_count": rescanned_count}
+
+    except Exception as e:
+        app_logger.error(f"Rescan error for {rel_path}: {e}")
+        if conn:
+            try:
+                conn.rollback()
+            except Exception:
+                pass
+        return {"success": False, "error": str(e)}
+
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass
+
+        # Resume the watcher if it was running
+        if watcher_was_running and ENABLE_WATCH:
+            app_logger.warning("Resuming filesystem watcher after selective rescan")
+            if _LIBRARY_WATCHER:
+                _LIBRARY_WATCHER.start()
+            else:
+                _LIBRARY_WATCHER = watcher.LibraryWatcher(LIBRARY_DIR)
+                _LIBRARY_WATCHER.start()
+
@app.get("/debug/fts")
 def debug_fts(_=Depends(require_basic)):
    return {"fts5": db.has_fts5()}

+@app.get("/debug/meta-raw")
+def debug_meta_raw(path: str, _=Depends(require_basic)):
+    """Show raw database row from meta table."""
+    conn = db.connect()
+    try:
+        row = conn.execute("SELECT * FROM meta WHERE rel=?", (path,)).fetchone()
+        if not row:
+            return JSONResponse({"error": "No metadata found"}, status_code=404)
+
+        # Convert to dict
+        result = {}
+        for key in row.keys():
+            result[key] = row[key]
+
+        return JSONResponse(result)
+    finally:
+        conn.close()
+
+@app.get("/debug/comic-by-path")
+def debug_comic_by_path(path: str, _=Depends(require_basic)):
+    """Alternative debug endpoint using query parameter instead of path parameter."""
+    app_logger.error(f"[DEBUG] /debug/comic-by-path called with: {path}")
+
+    conn = db.connect()
+    try:
+        row = db.get_item(conn, path)
+
+        if not row:
+            # Try to find similar paths
+            filename = path.split('/')[-1] if '/' in path else path
+            similar = conn.execute(
+                "SELECT rel FROM items WHERE rel LIKE ? AND is_dir=0 LIMIT 10",
+                (f"%{filename}%",)
+            ).fetchall()
+
+            app_logger.error(f"[DEBUG] Comic not found: {path}")
+            if similar:
+                app_logger.error(f"[DEBUG] Similar paths found:")
+                for s in similar:
+                    app_logger.error(f"[DEBUG]   - {s['rel']}")
+
+            return JSONResponse({
+                "error": "Comic not found in database",
+                "searched_path": path,
+                "similar_paths": [s["rel"] for s in similar] if similar else []
+            }, status_code=404)
+
+        # Found it!
+        result = {
+            "file_info": {
+                "rel": row["rel"],
+                "name": row["name"],
+                "parent": row["parent"],
+                "is_dir": row["is_dir"],
+                "size": row["size"],
+                "mtime": row["mtime"],
+                "ext": row["ext"],
+                "added_at": rget(row, "added_at")
+            },
+            "metadata": {}
+        }
+
+        # Get all metadata fields
+        meta_fields = [
+            "title", "series", "number", "volume", "year", "month", "day",
+            "writer", "publisher", "summary", "genre", "tags", "characters",
+            "teams", "locations", "comicvineissue", "format"
+        ]
+
+        for field in meta_fields:
+            try:
+                value = row[field]
+                if value is not None and value != "":
+                    result["metadata"][field] = value
+            except (KeyError, IndexError):
+                pass
+
+        app_logger.error(f"[DEBUG] Found comic, format field: {result['metadata'].get('format', 'NOT SET')}")
+        return JSONResponse(result)
+
+    finally:
+        conn.close()
+
+@app.get("/debug/list-comics")
+def debug_list_comics(limit: int = 20, search: str = None, _=Depends(require_basic)):
+    """List comics in the database with their exact paths and format field."""
+    conn = db.connect()
+    try:
+        if search:
+            rows = conn.execute(
+                """SELECT i.rel, i.name, m.format
+                   FROM items i
+                   LEFT JOIN meta m ON i.rel = m.rel
+                   WHERE i.is_dir=0 AND i.rel LIKE ?
+                   ORDER BY i.rel
+                   LIMIT ?""",
+                (f"%{search}%", limit)
+            ).fetchall()
+        else:
+            rows = conn.execute(
+                """SELECT i.rel, i.name, m.format
+                   FROM items i
+                   LEFT JOIN meta m ON i.rel = m.rel
+                   WHERE i.is_dir=0
+                   ORDER BY i.rel
+                   LIMIT ?""",
+                (limit,)
+            ).fetchall()
+
+        app_logger.error(f"[DEBUG] /debug/list-comics found {len(rows)} comics")
+        for r in rows[:5]:  # Log first 5
+            app_logger.error(f"[DEBUG]   - {r['rel']} (format: {rget(r, 'format', 'NULL')})")
+
+        return JSONResponse({
+            "count": len(rows),
+            "limit": limit,
+            "search": search,
+            "comics": [{"rel": r["rel"], "name": r["name"], "format": rget(r, "format")} for r in rows]
+        })
+    finally:
+        conn.close()
+
+@app.get("/debug/comic/{path:path}")
+def debug_comic(path: str, _=Depends(require_basic)):
+    """Debug endpoint to see what's stored in the database for a specific comic."""
+    from urllib.parse import unquote
+
+    # FastAPI already decodes the path, but let's try both just in case
+    paths_to_try = [path, unquote(path)]
+
+    app_logger.error(f"[DEBUG] Looking up comic in database:")
+    app_logger.error(f"[DEBUG]   Path from FastAPI: {path}")
+    app_logger.error(f"[DEBUG]   Trying paths: {paths_to_try}")
+
+    conn = db.connect()
+    try:
+        row = None
+        for try_path in paths_to_try:
+            row = db.get_item(conn, try_path)
+            if row:
+                app_logger.error(f"[DEBUG] Found comic using path: {try_path}")
+                break
+            else:
+                app_logger.error(f"[DEBUG] Not found with path: {try_path}")
+
+        if not row:
+            # Try to find similar paths
+            similar = conn.execute(
+                "SELECT rel FROM items WHERE rel LIKE ? AND is_dir=0 LIMIT 5",
+                (f"%{decoded_path.split('/')[-1]}%",)
+            ).fetchall()
+
+            app_logger.error(f"[DEBUG] Comic not found in database")
+            if similar:
+                app_logger.error(f"[DEBUG] Similar paths found:")
+                for s in similar:
+                    app_logger.error(f"[DEBUG]   - {s['rel']}")
+
+            return JSONResponse({
+                "error": "Comic not found in database",
+                "searched_path": decoded_path,
+                "similar_paths": [s["rel"] for s in similar] if similar else []
+            }, status_code=404)
+
+        # Convert row to dict
+        result = {
+            "file_info": {
+                "rel": row["rel"],
+                "name": row["name"],
+                "parent": row["parent"],
+                "is_dir": row["is_dir"],
+                "size": row["size"],
+                "mtime": row["mtime"],
+                "ext": row["ext"],
+                "added_at": rget(row, "added_at")
+            },
+            "metadata": {}
+        }
+
+        # Get all metadata fields
+        meta_fields = [
+            "title", "series", "number", "volume", "year", "month", "day",
+            "writer", "publisher", "summary", "genre", "tags", "characters",
+            "teams", "locations", "comicvineissue", "format"
+        ]
+
+        for field in meta_fields:
+            try:
+                value = row[field]
+                if value is not None and value != "":
+                    result["metadata"][field] = value
+            except (KeyError, IndexError):
+                pass
+
+        return JSONResponse(result)
+    finally:
+        conn.close()
+
@app.on_event("startup")
 def startup():
    if not LIBRARY_DIR.exists():
@@ -1253,6 +1656,40 @@ def admin_reindex(_=Depends(require_basic)):
    _start_scan(force=True)
    return JSONResponse({"ok": True, "started": True})

+@app.post("/admin/rescan", response_class=JSONResponse)
+async def admin_rescan_path(request: Request, _=Depends(require_basic)):
+    """
+    Rescan a specific file or folder.
+    Request body: {"path": "relative/path/from/library"}
+    """
+    try:
+        body = await request.json()
+        rel_path = body.get("path", "").strip().strip("/")
+
+        if not rel_path:
+            return JSONResponse({"ok": False, "error": "Path is required"}, status_code=400)
+
+        # Run rescan in background thread to avoid blocking
+        result = {"ok": False}
+
+        def run_rescan():
+            nonlocal result
+            result = _rescan_path(rel_path)
+            result["ok"] = result.get("success", False)
+
+        t = threading.Thread(target=run_rescan, daemon=False)
+        t.start()
+        t.join(timeout=30)  # Wait up to 30 seconds
+
+        if t.is_alive():
+            return JSONResponse({"ok": False, "error": "Rescan timed out"}, status_code=408)
+
+        return JSONResponse(result)
+
+    except Exception as e:
+        app_logger.error(f"Rescan endpoint error: {e}")
+        return JSONResponse({"ok": False, "error": str(e)}, status_code=500)
+
@app.post("/admin/thumbs/precache", response_class=JSONResponse)
 def admin_thumbs_precache(_=Depends(require_basic)):
    if _THUMB_STATUS["running"]:
@@ -94,6 +94,33 @@
      </div>
    </div>

+    <!-- Selective Rescan (Collapsible) -->
+    <div class="accordion mb-3" id="rescanAccordion">
+      <div class="accordion-item">
+        <h2 class="accordion-header">
+          <button class="accordion-button collapsed" type="button" data-bs-toggle="collapse" data-bs-target="#rescanCollapse" aria-expanded="false" aria-controls="rescanCollapse">
+            <i class="bi bi-arrow-clockwise me-2"></i>Selective Rescan
+          </button>
+        </h2>
+        <div id="rescanCollapse" class="accordion-collapse collapse" data-bs-parent="#rescanAccordion">
+          <div class="accordion-body">
+            <p class="small text-secondary mb-2">
+              Rescan a specific file or folder without reindexing the entire library. Useful for fixing metadata issues.
+            </p>
+            <div class="input-group">
+              <input type="text" class="form-control" id="rescanPath"
+                     placeholder="e.g., folder/subfolder or folder/comic.cbz"
+                     aria-label="Path to rescan">
+              <button class="btn btn-primary" type="button" id="rescanBtn">
+                <i class="bi bi-arrow-clockwise me-1"></i> Rescan Path
+              </button>
+            </div>
+            <div id="rescanResult" class="mt-2 d-none"></div>
+          </div>
+        </div>
+      </div>
+    </div>
+
    <!-- KPIs -->
    <div class="row g-3 kpis">
      <div class="col-12 col-md-6 col-xl-3">
@@ -387,6 +414,58 @@
      }
    });

+    // Selective rescan
+    document.getElementById("rescanBtn").addEventListener("click", async () => {
+      const btn = document.getElementById("rescanBtn");
+      const input = document.getElementById("rescanPath");
+      const resultDiv = document.getElementById("rescanResult");
+      const path = input.value.trim();
+
+      if (!path) {
+        resultDiv.className = "mt-2 alert alert-warning";
+        resultDiv.textContent = "Please enter a path to rescan";
+        resultDiv.classList.remove("d-none");
+        return;
+      }
+
+      const original = btn.innerHTML;
+      try {
+        btn.disabled = true;
+        input.disabled = true;
+        btn.innerHTML = '<span class="spinner-border spinner-border-sm me-1"></span> Rescanning…';
+        resultDiv.className = "mt-2 alert alert-info";
+        resultDiv.textContent = "Rescanning " + path + "...";
+        resultDiv.classList.remove("d-none");
+
+        const response = await fetch("/admin/rescan", {
+          method: "POST",
+          headers: { "Content-Type": "application/json" },
+          credentials: "include",
+          body: JSON.stringify({ path: path })
+        });
+
+        const result = await response.json();
+
+        if (result.ok) {
+          resultDiv.className = "mt-2 alert alert-success";
+          resultDiv.innerHTML = `<i class="bi bi-check-circle me-2"></i>Successfully rescanned ${result.rescanned_count || 0} file(s)`;
+          input.value = "";
+          // Refresh stats after rescan
+          setTimeout(load, 500);
+        } else {
+          resultDiv.className = "mt-2 alert alert-danger";
+          resultDiv.innerHTML = `<i class="bi bi-exclamation-triangle me-2"></i>Rescan failed: ${result.error || "Unknown error"}`;
+        }
+      } catch (e) {
+        resultDiv.className = "mt-2 alert alert-danger";
+        resultDiv.innerHTML = `<i class="bi bi-exclamation-triangle me-2"></i>Error: ${e?.message || e}`;
+      } finally {
+        btn.disabled = false;
+        input.disabled = false;
+        btn.innerHTML = original;
+      }
+    });
+
    // NEW: Clean page cache
    async function updateCacheStatus() {
      try{