Added selective scanning of a folder or file

This commit is contained in:
2026-01-30 15:37:04 +01:00
parent 492048759c
commit c9dbea6c8b
2 changed files with 520 additions and 4 deletions
+441 -4
View File
@@ -134,10 +134,17 @@ def _count_cbz(root: Path) -> int:
def _parent_rel(rel: str) -> str:
return "" if "/" not in rel else rel.rsplit("/", 1)[0]
def _read_comicinfo(cbz_path: Path) -> Dict[str, Any]:
"""Lightweight ComicInfo.xml reader."""
def _read_comicinfo(cbz_path: Path, debug: bool = False) -> Dict[str, Any]:
"""
Lightweight ComicInfo.xml reader.
Set debug=True to enable detailed logging of XML parsing.
"""
from xml.etree import ElementTree as ET
meta: Dict[str, Any] = {}
if debug:
app_logger.error(f"[DEBUG] Reading ComicInfo.xml from: {cbz_path}")
try:
with zipfile.ZipFile(cbz_path, "r") as zf:
xml_name = None
@@ -145,23 +152,68 @@ def _read_comicinfo(cbz_path: Path) -> Dict[str, Any]:
if n.lower().endswith("comicinfo.xml") and not n.endswith("/"):
xml_name = n
break
if not xml_name:
if debug:
app_logger.error(f"[DEBUG] No ComicInfo.xml found in {cbz_path.name}")
return meta
if debug:
app_logger.error(f"[DEBUG] Found ComicInfo.xml at: {xml_name}")
with zf.open(xml_name) as fp:
tree = ET.parse(fp)
root = tree.getroot()
if debug:
app_logger.error(f"[DEBUG] XML root tag: {root.tag}")
app_logger.error(f"[DEBUG] Total XML elements: {len(list(root))}")
elements_processed = 0
elements_skipped = 0
for el in root:
k = el.tag.lower()
v = (el.text or "").strip()
if debug:
if v:
app_logger.error(f"[DEBUG] ✓ {el.tag} = '{v}' (stored as '{k}')")
else:
app_logger.error(f"[DEBUG] ✗ {el.tag} = (empty/whitespace) - SKIPPED")
elements_skipped += 1
if v:
meta[k] = v
elements_processed += 1
if debug:
app_logger.error(f"[DEBUG] Elements processed: {elements_processed}, skipped: {elements_skipped}")
# Special handling
if "title" not in meta and "booktitle" in meta:
meta["title"] = meta.get("booktitle")
if debug:
app_logger.error(f"[DEBUG] Using 'booktitle' as 'title': {meta['title']}")
for k in ("number", "volume", "year", "month", "day"):
if k in meta:
meta[k] = meta[k].strip()
except Exception:
pass
if debug:
app_logger.error(f"[DEBUG] Final metadata keys: {list(meta.keys())}")
app_logger.error(f"[DEBUG] Metadata to be stored:")
for key, val in meta.items():
app_logger.error(f"[DEBUG] {key}: {val}")
except Exception as e:
if debug:
app_logger.error(f"[DEBUG] Error reading ComicInfo.xml from {cbz_path.name}: {e}")
import traceback
app_logger.error(f"[DEBUG] Traceback:\n{traceback.format_exc()}")
else:
app_logger.debug(f"Failed to read ComicInfo.xml from {cbz_path.name}: {e}")
return meta
def _index_progress(rel: str):
@@ -331,10 +383,361 @@ def _start_scan(force=False):
t = threading.Thread(target=_run_scan, daemon=True)
t.start()
def _rescan_path(rel_path: str):
"""
Rescan a specific file or folder.
rel_path: relative path from LIBRARY_DIR (e.g., "folder/comic.cbz" or "folder")
"""
global _LIBRARY_WATCHER
app_logger.error(f"[DEBUG] === Starting Selective Rescan ===")
app_logger.error(f"[DEBUG] Relative path: {rel_path}")
app_logger.error(f"[DEBUG] Library directory: {LIBRARY_DIR}")
# Pause the watcher during rescan to avoid database conflicts
watcher_was_running = False
if _LIBRARY_WATCHER and _LIBRARY_WATCHER.observer and _LIBRARY_WATCHER.observer.is_alive():
app_logger.error("[DEBUG] Pausing filesystem watcher during selective rescan")
_LIBRARY_WATCHER.stop()
watcher_was_running = True
conn = db.connect()
try:
abs_path = LIBRARY_DIR / rel_path
app_logger.error(f"[DEBUG] Absolute path: {abs_path}")
app_logger.error(f"[DEBUG] Path exists: {abs_path.exists()}")
if not abs_path.exists():
app_logger.error(f"[DEBUG] ERROR: Path does not exist: {abs_path}")
return {"success": False, "error": "Path does not exist"}
rescanned_count = 0
app_logger.error(f"[DEBUG] Is file: {abs_path.is_file()}")
app_logger.error(f"[DEBUG] Is directory: {abs_path.is_dir()}")
if abs_path.is_file():
# Rescan single file
app_logger.error(f"[DEBUG] Processing single file: {abs_path.name}")
if abs_path.suffix.lower() == ".cbz":
st = abs_path.stat()
db.upsert_file(
conn,
rel=rel_path,
name=abs_path.stem,
size=st.st_size,
mtime=st.st_mtime,
parent=_parent_rel(rel_path),
ext="cbz",
)
# Enable debug logging for selective rescan
meta = _read_comicinfo(abs_path, debug=True)
if meta:
app_logger.error(f"[DEBUG] Upserting metadata for {rel_path} with {len(meta)} fields")
db.upsert_meta(conn, rel=rel_path, meta=meta)
app_logger.error(f"[DEBUG] Metadata upsert completed for {rel_path}")
else:
app_logger.error(f"[DEBUG] No metadata extracted from {rel_path}")
# Update FTS if enabled
if db.has_fts5():
text_parts = [
abs_path.stem,
meta.get("title", ""),
meta.get("series", ""),
meta.get("writer", ""),
meta.get("publisher", "")
]
text = " ".join([p for p in text_parts if p])
conn.execute(
"INSERT OR REPLACE INTO fts(rel, text) VALUES (?, ?)",
(rel_path, text)
)
rescanned_count = 1
app_logger.warning(f"Rescanned file: {rel_path}")
else:
# Rescan folder recursively
for root, dirs, files in os.walk(abs_path, followlinks=True):
root_path = Path(root)
for fn in files:
file_path = root_path / fn
if file_path.suffix.lower() != ".cbz":
continue
try:
if not file_path.exists():
continue
file_rel = file_path.relative_to(LIBRARY_DIR).as_posix()
st = file_path.stat()
db.upsert_file(
conn,
rel=file_rel,
name=file_path.stem,
size=st.st_size,
mtime=st.st_mtime,
parent=_parent_rel(file_rel),
ext="cbz",
)
meta = _read_comicinfo(file_path)
if meta:
db.upsert_meta(conn, rel=file_rel, meta=meta)
# Update FTS if enabled
if db.has_fts5():
text_parts = [
file_path.stem,
meta.get("title", ""),
meta.get("series", ""),
meta.get("writer", ""),
meta.get("publisher", "")
]
text = " ".join([p for p in text_parts if p])
conn.execute(
"INSERT OR REPLACE INTO fts(rel, text) VALUES (?, ?)",
(file_rel, text)
)
rescanned_count += 1
app_logger.info(f"Rescanned: {file_rel}")
except Exception as e:
app_logger.error(f"Failed to rescan {file_path}: {e}")
continue
app_logger.warning(f"Rescanned folder: {rel_path} ({rescanned_count} files)")
conn.commit()
return {"success": True, "rescanned_count": rescanned_count}
except Exception as e:
app_logger.error(f"Rescan error for {rel_path}: {e}")
if conn:
try:
conn.rollback()
except Exception:
pass
return {"success": False, "error": str(e)}
finally:
try:
conn.close()
except Exception:
pass
# Resume the watcher if it was running
if watcher_was_running and ENABLE_WATCH:
app_logger.warning("Resuming filesystem watcher after selective rescan")
if _LIBRARY_WATCHER:
_LIBRARY_WATCHER.start()
else:
_LIBRARY_WATCHER = watcher.LibraryWatcher(LIBRARY_DIR)
_LIBRARY_WATCHER.start()
@app.get("/debug/fts")
def debug_fts(_=Depends(require_basic)):
return {"fts5": db.has_fts5()}
@app.get("/debug/meta-raw")
def debug_meta_raw(path: str, _=Depends(require_basic)):
"""Show raw database row from meta table."""
conn = db.connect()
try:
row = conn.execute("SELECT * FROM meta WHERE rel=?", (path,)).fetchone()
if not row:
return JSONResponse({"error": "No metadata found"}, status_code=404)
# Convert to dict
result = {}
for key in row.keys():
result[key] = row[key]
return JSONResponse(result)
finally:
conn.close()
@app.get("/debug/comic-by-path")
def debug_comic_by_path(path: str, _=Depends(require_basic)):
"""Alternative debug endpoint using query parameter instead of path parameter."""
app_logger.error(f"[DEBUG] /debug/comic-by-path called with: {path}")
conn = db.connect()
try:
row = db.get_item(conn, path)
if not row:
# Try to find similar paths
filename = path.split('/')[-1] if '/' in path else path
similar = conn.execute(
"SELECT rel FROM items WHERE rel LIKE ? AND is_dir=0 LIMIT 10",
(f"%{filename}%",)
).fetchall()
app_logger.error(f"[DEBUG] Comic not found: {path}")
if similar:
app_logger.error(f"[DEBUG] Similar paths found:")
for s in similar:
app_logger.error(f"[DEBUG] - {s['rel']}")
return JSONResponse({
"error": "Comic not found in database",
"searched_path": path,
"similar_paths": [s["rel"] for s in similar] if similar else []
}, status_code=404)
# Found it!
result = {
"file_info": {
"rel": row["rel"],
"name": row["name"],
"parent": row["parent"],
"is_dir": row["is_dir"],
"size": row["size"],
"mtime": row["mtime"],
"ext": row["ext"],
"added_at": rget(row, "added_at")
},
"metadata": {}
}
# Get all metadata fields
meta_fields = [
"title", "series", "number", "volume", "year", "month", "day",
"writer", "publisher", "summary", "genre", "tags", "characters",
"teams", "locations", "comicvineissue", "format"
]
for field in meta_fields:
try:
value = row[field]
if value is not None and value != "":
result["metadata"][field] = value
except (KeyError, IndexError):
pass
app_logger.error(f"[DEBUG] Found comic, format field: {result['metadata'].get('format', 'NOT SET')}")
return JSONResponse(result)
finally:
conn.close()
@app.get("/debug/list-comics")
def debug_list_comics(limit: int = 20, search: str = None, _=Depends(require_basic)):
"""List comics in the database with their exact paths and format field."""
conn = db.connect()
try:
if search:
rows = conn.execute(
"""SELECT i.rel, i.name, m.format
FROM items i
LEFT JOIN meta m ON i.rel = m.rel
WHERE i.is_dir=0 AND i.rel LIKE ?
ORDER BY i.rel
LIMIT ?""",
(f"%{search}%", limit)
).fetchall()
else:
rows = conn.execute(
"""SELECT i.rel, i.name, m.format
FROM items i
LEFT JOIN meta m ON i.rel = m.rel
WHERE i.is_dir=0
ORDER BY i.rel
LIMIT ?""",
(limit,)
).fetchall()
app_logger.error(f"[DEBUG] /debug/list-comics found {len(rows)} comics")
for r in rows[:5]: # Log first 5
app_logger.error(f"[DEBUG] - {r['rel']} (format: {rget(r, 'format', 'NULL')})")
return JSONResponse({
"count": len(rows),
"limit": limit,
"search": search,
"comics": [{"rel": r["rel"], "name": r["name"], "format": rget(r, "format")} for r in rows]
})
finally:
conn.close()
@app.get("/debug/comic/{path:path}")
def debug_comic(path: str, _=Depends(require_basic)):
"""Debug endpoint to see what's stored in the database for a specific comic."""
from urllib.parse import unquote
# FastAPI already decodes the path, but let's try both just in case
paths_to_try = [path, unquote(path)]
app_logger.error(f"[DEBUG] Looking up comic in database:")
app_logger.error(f"[DEBUG] Path from FastAPI: {path}")
app_logger.error(f"[DEBUG] Trying paths: {paths_to_try}")
conn = db.connect()
try:
row = None
for try_path in paths_to_try:
row = db.get_item(conn, try_path)
if row:
app_logger.error(f"[DEBUG] Found comic using path: {try_path}")
break
else:
app_logger.error(f"[DEBUG] Not found with path: {try_path}")
if not row:
# Try to find similar paths
similar = conn.execute(
"SELECT rel FROM items WHERE rel LIKE ? AND is_dir=0 LIMIT 5",
(f"%{decoded_path.split('/')[-1]}%",)
).fetchall()
app_logger.error(f"[DEBUG] Comic not found in database")
if similar:
app_logger.error(f"[DEBUG] Similar paths found:")
for s in similar:
app_logger.error(f"[DEBUG] - {s['rel']}")
return JSONResponse({
"error": "Comic not found in database",
"searched_path": decoded_path,
"similar_paths": [s["rel"] for s in similar] if similar else []
}, status_code=404)
# Convert row to dict
result = {
"file_info": {
"rel": row["rel"],
"name": row["name"],
"parent": row["parent"],
"is_dir": row["is_dir"],
"size": row["size"],
"mtime": row["mtime"],
"ext": row["ext"],
"added_at": rget(row, "added_at")
},
"metadata": {}
}
# Get all metadata fields
meta_fields = [
"title", "series", "number", "volume", "year", "month", "day",
"writer", "publisher", "summary", "genre", "tags", "characters",
"teams", "locations", "comicvineissue", "format"
]
for field in meta_fields:
try:
value = row[field]
if value is not None and value != "":
result["metadata"][field] = value
except (KeyError, IndexError):
pass
return JSONResponse(result)
finally:
conn.close()
@app.on_event("startup")
def startup():
if not LIBRARY_DIR.exists():
@@ -1253,6 +1656,40 @@ def admin_reindex(_=Depends(require_basic)):
_start_scan(force=True)
return JSONResponse({"ok": True, "started": True})
@app.post("/admin/rescan", response_class=JSONResponse)
async def admin_rescan_path(request: Request, _=Depends(require_basic)):
"""
Rescan a specific file or folder.
Request body: {"path": "relative/path/from/library"}
"""
try:
body = await request.json()
rel_path = body.get("path", "").strip().strip("/")
if not rel_path:
return JSONResponse({"ok": False, "error": "Path is required"}, status_code=400)
# Run rescan in background thread to avoid blocking
result = {"ok": False}
def run_rescan():
nonlocal result
result = _rescan_path(rel_path)
result["ok"] = result.get("success", False)
t = threading.Thread(target=run_rescan, daemon=False)
t.start()
t.join(timeout=30) # Wait up to 30 seconds
if t.is_alive():
return JSONResponse({"ok": False, "error": "Rescan timed out"}, status_code=408)
return JSONResponse(result)
except Exception as e:
app_logger.error(f"Rescan endpoint error: {e}")
return JSONResponse({"ok": False, "error": str(e)}, status_code=500)
@app.post("/admin/thumbs/precache", response_class=JSONResponse)
def admin_thumbs_precache(_=Depends(require_basic)):
if _THUMB_STATUS["running"]:
+79
View File
@@ -94,6 +94,33 @@
</div>
</div>
<!-- Selective Rescan (Collapsible) -->
<div class="accordion mb-3" id="rescanAccordion">
<div class="accordion-item">
<h2 class="accordion-header">
<button class="accordion-button collapsed" type="button" data-bs-toggle="collapse" data-bs-target="#rescanCollapse" aria-expanded="false" aria-controls="rescanCollapse">
<i class="bi bi-arrow-clockwise me-2"></i>Selective Rescan
</button>
</h2>
<div id="rescanCollapse" class="accordion-collapse collapse" data-bs-parent="#rescanAccordion">
<div class="accordion-body">
<p class="small text-secondary mb-2">
Rescan a specific file or folder without reindexing the entire library. Useful for fixing metadata issues.
</p>
<div class="input-group">
<input type="text" class="form-control" id="rescanPath"
placeholder="e.g., folder/subfolder or folder/comic.cbz"
aria-label="Path to rescan">
<button class="btn btn-primary" type="button" id="rescanBtn">
<i class="bi bi-arrow-clockwise me-1"></i> Rescan Path
</button>
</div>
<div id="rescanResult" class="mt-2 d-none"></div>
</div>
</div>
</div>
</div>
<!-- KPIs -->
<div class="row g-3 kpis">
<div class="col-12 col-md-6 col-xl-3">
@@ -387,6 +414,58 @@
}
});
// Selective rescan
document.getElementById("rescanBtn").addEventListener("click", async () => {
const btn = document.getElementById("rescanBtn");
const input = document.getElementById("rescanPath");
const resultDiv = document.getElementById("rescanResult");
const path = input.value.trim();
if (!path) {
resultDiv.className = "mt-2 alert alert-warning";
resultDiv.textContent = "Please enter a path to rescan";
resultDiv.classList.remove("d-none");
return;
}
const original = btn.innerHTML;
try {
btn.disabled = true;
input.disabled = true;
btn.innerHTML = '<span class="spinner-border spinner-border-sm me-1"></span> Rescanning…';
resultDiv.className = "mt-2 alert alert-info";
resultDiv.textContent = "Rescanning " + path + "...";
resultDiv.classList.remove("d-none");
const response = await fetch("/admin/rescan", {
method: "POST",
headers: { "Content-Type": "application/json" },
credentials: "include",
body: JSON.stringify({ path: path })
});
const result = await response.json();
if (result.ok) {
resultDiv.className = "mt-2 alert alert-success";
resultDiv.innerHTML = `<i class="bi bi-check-circle me-2"></i>Successfully rescanned ${result.rescanned_count || 0} file(s)`;
input.value = "";
// Refresh stats after rescan
setTimeout(load, 500);
} else {
resultDiv.className = "mt-2 alert alert-danger";
resultDiv.innerHTML = `<i class="bi bi-exclamation-triangle me-2"></i>Rescan failed: ${result.error || "Unknown error"}`;
}
} catch (e) {
resultDiv.className = "mt-2 alert alert-danger";
resultDiv.innerHTML = `<i class="bi bi-exclamation-triangle me-2"></i>Error: ${e?.message || e}`;
} finally {
btn.disabled = false;
input.disabled = false;
btn.innerHTML = original;
}
});
// NEW: Clean page cache
async function updateCacheStatus() {
try{