Files
CBZGenerator/generate_cbz.py
2025-09-11 22:23:36 +02:00

373 lines
14 KiB
Python

#!/usr/bin/env python3
import argparse
import io
import json
import random
import zipfile
import re
from datetime import datetime
from pathlib import Path
try:
from PIL import Image, ImageDraw, ImageFont
except ImportError:
raise SystemExit("Please install Pillow first: pip install pillow")
MONTHS = [
"January","February","March","April","May","June",
"July","August","September","October","November","December"
]
MAX_YEAR = 2025 # <- hard cap
FORMAT_OPTIONS = [
"Main Series",
"Limited Series",
"One-Shot",
"TPB",
"Annual",
"Preview",
"Balck & White",
"Black & White",
"Director'Cut",
"Director's Cut",
"Graphic Novel"
]
def normalize_format(fmt: str) -> str:
f = fmt.strip().lower()
if f == "main series":
return "Main Series"
if f in {"limited series", "limited"}:
return "Limited Series"
if f in {"one-shot", "oneshot"}:
return "One-Shot"
if f in {"tpb", "trade", "trade paperback"}:
return "TPB"
if f == "annual":
return "Annual"
if f in {"director's cut", "director'cut", "directors cut"}:
return "Director's Cut"
return fmt
def load_data(json_path: Path):
with open(json_path, "r", encoding="utf-8") as f:
data = json.load(f)
pubs_raw = data.get("publishers") or []
if not pubs_raw or not isinstance(pubs_raw, list):
raise ValueError("`publishers` must be a list of objects.")
publishers, writers_by_pub, chars_by_pub = [], {}, {}
for p in pubs_raw:
if not isinstance(p, dict): continue
name = p.get("name")
if not name: continue
publishers.append(name)
writers_by_pub[name] = list(p.get("writers") or [])
chars_by_pub[name] = list(p.get("characters") or [])
if not publishers:
raise ValueError("No valid publishers with names found.")
works = data.get("works") or []
if not works:
works = [
"Odyssey","Legacy","Eclipse","Frontier","Spectrum","Monolith","Harbinger",
"Chronicle","Vanguard","Paradox","Catalyst","Requiem","Arcadia","Equinox",
"Ironclad","Apex","Arc","Vector","Nimbus","Cinder"
]
return publishers, writers_by_pub, chars_by_pub, works
def slugify(text: str):
keep = "-_.()[]#, "
return "".join(c for c in text if c.isalnum() or c in keep).strip()
def rand_series_title(works):
w1 = random.choice(works)
pattern = random.choice([
"{w1}","The {w1}","{w1} Chronicle","{w1}: Genesis","{w1} Rising","{w1} Reborn",
"{w1} & {w2}","{w1} of {w2}","{w1}: {w2}",
])
if "{w2}" in pattern:
choices = [w for w in works if w != w1] or works
w2 = random.choice(choices)
return pattern.format(w1=w1, w2=w2)
return pattern.format(w1=w1)
def choose_writer(publisher, writers_by_pub):
lst = writers_by_pub.get(publisher) or []
if lst: return random.choice(lst)
return random.choice([
"Alex Grant","Taylor Miller","Jordan Bishop","Morgan Reeves","Riley Carter",
"Sam Hayes","Casey Harper","Jamie Brooks","Avery Collins","Quinn Rowe"
])
def choose_character(publisher, chars_by_pub):
lst = chars_by_pub.get(publisher) or []
if lst: return random.choice(lst)
return random.choice([
"Sentinel","Nightglass","Starflare","Iron Warden","Moonstrike","Volt Runner","Red Quill"
])
def add_months(year: int, month: int, delta: int):
idx = (year * 12 + (month - 1)) + delta
new_year = idx // 12
new_month = (idx % 12) + 1
return new_year, new_month
def sub_months(year: int, month: int, delta: int):
idx = (year * 12 + (month - 1)) - delta
new_year = idx // 12
new_month = (idx % 12) + 1
return new_year, new_month
def rand_start_date_for_monthly(n_issues: int, year_min: int = 1960):
"""
Choose a random start (year,month) such that start + (n_issues-1) months <= Dec MAX_YEAR.
"""
latest_y, latest_m = sub_months(MAX_YEAR, 12, max(0, n_issues - 1))
# Build month-index range
min_idx = year_min * 12 # Jan
max_idx = latest_y * 12 + (latest_m - 1)
if max_idx < min_idx:
# If range is invalid, clamp to year_min Jan
return year_min, 1
pick = random.randint(min_idx, max_idx)
return pick // 12, (pick % 12) + 1
def rand_start_date_for_annuals(n_issues: int, year_min: int = 1960):
"""
Choose a start year so that start_year + (n_issues - 1) <= MAX_YEAR.
Month can be any (fixed across annuals).
"""
latest_start_year = MAX_YEAR - max(0, n_issues - 1)
if latest_start_year < year_min:
latest_start_year = year_min
y = random.randint(year_min, latest_start_year)
m = random.randint(1, 12)
return y, m
def zero_pad_page(n: int) -> str:
return f"P{n:05d}.jpg"
def make_jpeg_bytes(text: str, width=1200, height=1800):
img = Image.new("RGB", (width, height), color="white")
draw = ImageDraw.Draw(img)
try:
font = ImageFont.truetype("DejaVuSans.ttf", size=64)
except Exception:
font = ImageFont.load_default()
lines = text.split("\n")
sizes = []
for line in lines:
bbox = draw.textbbox((0,0), line, font=font)
w, h = bbox[2]-bbox[0], bbox[3]-bbox[1]
sizes.append((w,h))
total_h = sum(h for _,h in sizes) + (len(lines)-1)*20
y = (height - total_h) // 2
for (line,(w,h)) in zip(lines, sizes):
x = (width - w) // 2
draw.text((x,y), line, fill="black", font=font)
y += h + 20
buf = io.BytesIO()
img.save(buf, format="JPEG", quality=90)
return buf.getvalue()
def escape_xml(s: str) -> str:
return (s.replace("&","&amp;")
.replace("<","&lt;")
.replace(">","&gt;")
.replace('"',"&quot;")
.replace("'","&apos;"))
def build_comicinfo_xml(series, number, title, volume_year, year, month,
publisher, writer, characters, fmt, page_count):
chars_joined = ", ".join(characters if isinstance(characters, (list, tuple)) else [characters])
xml = f"""<?xml version="1.0" encoding="utf-8"?>
<ComicInfo>
<Title>{escape_xml(title)}</Title>
<Series>{escape_xml(series)}</Series>
<Number>{number}</Number>
<Volume>{volume_year}</Volume>
<Year>{year}</Year>
<Month>{month}</Month>
<Publisher>{escape_xml(publisher)}</Publisher>
<Writer>{escape_xml(writer)}</Writer>
<Characters>{escape_xml(chars_joined)}</Characters>
<Format>{escape_xml(fmt)}</Format>
<LanguageISO>en</LanguageISO>
<PageCount>{page_count}</PageCount>
<Summary>Generated for application stress testing.</Summary>
</ComicInfo>
"""
return xml
def make_filename(series, issue_no, month_name, year):
return f"{series} #{issue_no:03d} [{month_name}, {year}].cbz"
def issues_for_format(fmt_norm: str) -> int:
if fmt_norm == "Main Series": return random.randint(1, 500)
if fmt_norm == "Limited Series": return random.randint(1, 15)
if fmt_norm == "One-Shot": return 1
if fmt_norm == "TPB": return random.randint(1, 10)
if fmt_norm == "Director's Cut": return random.randint(1, 5)
if fmt_norm == "Annual": return random.randint(1, 5)
return 1
# ---------- continue existing volumes ----------
def series_target_dir(base_out: Path, publisher: str, character: str, fmt_display: str,
volume_year: int, series: str) -> Path:
series_folder_name = f"({volume_year}) {series}"
return base_out / slugify(publisher) / slugify(character) / slugify(fmt_display) / slugify(series_folder_name)
def scan_existing_issue_info(target_dir: Path, series: str):
"""
Returns (existing_max_issue_no, first_issue_year, first_issue_month)
for files like 'Series Name #NNN [Month, Year].cbz'
"""
if not target_dir.exists():
return 0, None, None
max_no = 0
first_year = None
first_month = None
pat = re.compile(rf"^{re.escape(series)} #(\d{{3}}) \[([A-Za-z]+), (\d{{4}})\]$")
for p in target_dir.glob("*.cbz"):
m = pat.match(p.stem)
if not m: continue
n = int(m.group(1))
mon_name = m.group(2)
yr = int(m.group(3))
if n > max_no: max_no = n
if n == 1 and mon_name in MONTHS:
first_month = MONTHS.index(mon_name) + 1
first_year = yr
return max_no, first_year, first_month
# ---------- core generation ----------
def generate_issue_cbz(base_out: Path, publisher: str, character: str, fmt_display: str,
series: str, issue_no: int, writer: str,
volume_year: int, issue_year: int, issue_month: int, page_count: int):
# Enforce cap at the final gate too (paranoia)
if issue_year > MAX_YEAR:
return None
month_name = MONTHS[issue_month - 1]
title = f"{series} #{issue_no}"
target_dir = series_target_dir(base_out, publisher, character, fmt_display, volume_year, series)
target_dir.mkdir(parents=True, exist_ok=True)
cbz_name = make_filename(series, issue_no, month_name, issue_year)
cbz_path = target_dir / cbz_name
comicinfo_xml = build_comicinfo_xml(
series=series, number=issue_no, title=title,
volume_year=volume_year, year=issue_year, month=issue_month,
publisher=publisher, writer=writer, characters=[character],
fmt=fmt_display, page_count=page_count
)
with zipfile.ZipFile(cbz_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
for i in range(1, page_count + 1):
filename = zero_pad_page(i)
img_bytes = make_jpeg_bytes(cbz_name if i == 1 else f"Page {i}")
zf.writestr(filename, img_bytes)
zf.writestr("ComicInfo.xml", comicinfo_xml)
return cbz_path
def estimate_total_issues(series_count: int):
total = 0
for _ in range(series_count):
fmt = normalize_format(random.choice(FORMAT_OPTIONS))
total += issues_for_format(fmt)
return total # rough estimate; cap may reduce actual total when continuing existing
def generate_one_series(base_out: Path, publishers, writers_by_pub, chars_by_pub, works,
counter, total_issues):
publisher = random.choice(publishers)
character = choose_character(publisher, chars_by_pub)
fmt_display = random.choice(FORMAT_OPTIONS)
fmt_norm = normalize_format(fmt_display)
writer = choose_writer(publisher, writers_by_pub)
series = rand_series_title(works)
if random.random() < 0.35:
series = f"{character}: {series}"
n_issues = issues_for_format(fmt_norm)
# Choose start date with MAX_YEAR cap in mind (unless continuing an existing volume)
if fmt_norm == "Annual":
start_year, start_month = rand_start_date_for_annuals(n_issues)
else:
start_year, start_month = rand_start_date_for_monthly(n_issues)
volume_year = start_year # volume = year of #1
# If folder exists, continue numbering and keep original #1 date if found
target_dir = series_target_dir(base_out, publisher, character, fmt_display, volume_year, series)
existing_max, first_y, first_m = scan_existing_issue_info(target_dir, series)
if first_y and first_m:
start_year, start_month = first_y, first_m
volume_year = first_y
start_issue = max(1, existing_max + 1)
if start_issue > n_issues:
return # nothing left to create
def rand_pages(): return random.randint(5, 10)
if fmt_norm == "Annual":
# Issue i => year = start_year + (i-1), month fixed
for issue_no in range(start_issue, n_issues + 1):
y = start_year + (issue_no - 1)
if y > MAX_YEAR:
break
m = start_month
if generate_issue_cbz(base_out, publisher, character, fmt_display, series,
issue_no, writer, volume_year, y, m, rand_pages()):
counter[0] += 1
if counter[0] % 100 == 0:
print(f"Generated {counter[0]} issues out of {total_issues}")
else:
# Monthly progression
for issue_no in range(start_issue, n_issues + 1):
y, m = add_months(start_year, start_month, issue_no - 1)
if y > MAX_YEAR:
break
if generate_issue_cbz(base_out, publisher, character, fmt_display, series,
issue_no, writer, volume_year, y, m, rand_pages()):
counter[0] += 1
if counter[0] % 100 == 0:
print(f"Generated {counter[0]} issues out of {total_issues}")
def main():
parser = argparse.ArgumentParser(description="Generate CBZ files for stress testing.")
parser.add_argument("count", type=int, help="Number of series to generate")
parser.add_argument("--out", type=Path, default=Path("output_cbz"), help="Output base directory")
parser.add_argument("--data", type=Path, default=Path("./comicdata.json"),
help="Path to comic data JSON (default: ./comicdata.json)")
parser.add_argument("--seed", type=int, default=None, help="Random seed for reproducibility")
args = parser.parse_args()
if args.seed is not None:
random.seed(args.seed)
publishers, writers_by_pub, chars_by_pub, works = load_data(args.data)
args.out.mkdir(parents=True, exist_ok=True)
total_issues_est = estimate_total_issues(args.count) # rough
if args.seed is not None:
random.seed(args.seed)
counter = [0]
for _ in range(args.count):
generate_one_series(args.out, publishers, writers_by_pub, chars_by_pub, works,
counter, total_issues_est)
print(f"Done. Generated {counter[0]} issues total (estimated {total_issues_est}).")
if __name__ == "__main__":
main()