373 lines
14 KiB
Python
373 lines
14 KiB
Python
#!/usr/bin/env python3
|
|
import argparse
|
|
import io
|
|
import json
|
|
import random
|
|
import zipfile
|
|
import re
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
try:
|
|
from PIL import Image, ImageDraw, ImageFont
|
|
except ImportError:
|
|
raise SystemExit("Please install Pillow first: pip install pillow")
|
|
|
|
MONTHS = [
|
|
"January","February","March","April","May","June",
|
|
"July","August","September","October","November","December"
|
|
]
|
|
|
|
MAX_YEAR = 2025 # <- hard cap
|
|
|
|
FORMAT_OPTIONS = [
|
|
"Main Series",
|
|
"Limited Series",
|
|
"One-Shot",
|
|
"TPB",
|
|
"Annual",
|
|
"Preview",
|
|
"Balck & White",
|
|
"Black & White",
|
|
"Director'Cut",
|
|
"Director's Cut",
|
|
"Graphic Novel"
|
|
]
|
|
|
|
def normalize_format(fmt: str) -> str:
|
|
f = fmt.strip().lower()
|
|
if f == "main series":
|
|
return "Main Series"
|
|
if f in {"limited series", "limited"}:
|
|
return "Limited Series"
|
|
if f in {"one-shot", "oneshot"}:
|
|
return "One-Shot"
|
|
if f in {"tpb", "trade", "trade paperback"}:
|
|
return "TPB"
|
|
if f == "annual":
|
|
return "Annual"
|
|
if f in {"director's cut", "director'cut", "directors cut"}:
|
|
return "Director's Cut"
|
|
return fmt
|
|
|
|
def load_data(json_path: Path):
|
|
with open(json_path, "r", encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
|
|
pubs_raw = data.get("publishers") or []
|
|
if not pubs_raw or not isinstance(pubs_raw, list):
|
|
raise ValueError("`publishers` must be a list of objects.")
|
|
|
|
publishers, writers_by_pub, chars_by_pub = [], {}, {}
|
|
for p in pubs_raw:
|
|
if not isinstance(p, dict): continue
|
|
name = p.get("name")
|
|
if not name: continue
|
|
publishers.append(name)
|
|
writers_by_pub[name] = list(p.get("writers") or [])
|
|
chars_by_pub[name] = list(p.get("characters") or [])
|
|
|
|
if not publishers:
|
|
raise ValueError("No valid publishers with names found.")
|
|
|
|
works = data.get("works") or []
|
|
if not works:
|
|
works = [
|
|
"Odyssey","Legacy","Eclipse","Frontier","Spectrum","Monolith","Harbinger",
|
|
"Chronicle","Vanguard","Paradox","Catalyst","Requiem","Arcadia","Equinox",
|
|
"Ironclad","Apex","Arc","Vector","Nimbus","Cinder"
|
|
]
|
|
return publishers, writers_by_pub, chars_by_pub, works
|
|
|
|
def slugify(text: str):
|
|
keep = "-_.()[]#, "
|
|
return "".join(c for c in text if c.isalnum() or c in keep).strip()
|
|
|
|
def rand_series_title(works):
|
|
w1 = random.choice(works)
|
|
pattern = random.choice([
|
|
"{w1}","The {w1}","{w1} Chronicle","{w1}: Genesis","{w1} Rising","{w1} Reborn",
|
|
"{w1} & {w2}","{w1} of {w2}","{w1}: {w2}",
|
|
])
|
|
if "{w2}" in pattern:
|
|
choices = [w for w in works if w != w1] or works
|
|
w2 = random.choice(choices)
|
|
return pattern.format(w1=w1, w2=w2)
|
|
return pattern.format(w1=w1)
|
|
|
|
def choose_writer(publisher, writers_by_pub):
|
|
lst = writers_by_pub.get(publisher) or []
|
|
if lst: return random.choice(lst)
|
|
return random.choice([
|
|
"Alex Grant","Taylor Miller","Jordan Bishop","Morgan Reeves","Riley Carter",
|
|
"Sam Hayes","Casey Harper","Jamie Brooks","Avery Collins","Quinn Rowe"
|
|
])
|
|
|
|
def choose_character(publisher, chars_by_pub):
|
|
lst = chars_by_pub.get(publisher) or []
|
|
if lst: return random.choice(lst)
|
|
return random.choice([
|
|
"Sentinel","Nightglass","Starflare","Iron Warden","Moonstrike","Volt Runner","Red Quill"
|
|
])
|
|
|
|
def add_months(year: int, month: int, delta: int):
|
|
idx = (year * 12 + (month - 1)) + delta
|
|
new_year = idx // 12
|
|
new_month = (idx % 12) + 1
|
|
return new_year, new_month
|
|
|
|
def sub_months(year: int, month: int, delta: int):
|
|
idx = (year * 12 + (month - 1)) - delta
|
|
new_year = idx // 12
|
|
new_month = (idx % 12) + 1
|
|
return new_year, new_month
|
|
|
|
def rand_start_date_for_monthly(n_issues: int, year_min: int = 1960):
|
|
"""
|
|
Choose a random start (year,month) such that start + (n_issues-1) months <= Dec MAX_YEAR.
|
|
"""
|
|
latest_y, latest_m = sub_months(MAX_YEAR, 12, max(0, n_issues - 1))
|
|
# Build month-index range
|
|
min_idx = year_min * 12 # Jan
|
|
max_idx = latest_y * 12 + (latest_m - 1)
|
|
if max_idx < min_idx:
|
|
# If range is invalid, clamp to year_min Jan
|
|
return year_min, 1
|
|
pick = random.randint(min_idx, max_idx)
|
|
return pick // 12, (pick % 12) + 1
|
|
|
|
def rand_start_date_for_annuals(n_issues: int, year_min: int = 1960):
|
|
"""
|
|
Choose a start year so that start_year + (n_issues - 1) <= MAX_YEAR.
|
|
Month can be any (fixed across annuals).
|
|
"""
|
|
latest_start_year = MAX_YEAR - max(0, n_issues - 1)
|
|
if latest_start_year < year_min:
|
|
latest_start_year = year_min
|
|
y = random.randint(year_min, latest_start_year)
|
|
m = random.randint(1, 12)
|
|
return y, m
|
|
|
|
def zero_pad_page(n: int) -> str:
|
|
return f"P{n:05d}.jpg"
|
|
|
|
def make_jpeg_bytes(text: str, width=1200, height=1800):
|
|
img = Image.new("RGB", (width, height), color="white")
|
|
draw = ImageDraw.Draw(img)
|
|
try:
|
|
font = ImageFont.truetype("DejaVuSans.ttf", size=64)
|
|
except Exception:
|
|
font = ImageFont.load_default()
|
|
|
|
lines = text.split("\n")
|
|
sizes = []
|
|
for line in lines:
|
|
bbox = draw.textbbox((0,0), line, font=font)
|
|
w, h = bbox[2]-bbox[0], bbox[3]-bbox[1]
|
|
sizes.append((w,h))
|
|
total_h = sum(h for _,h in sizes) + (len(lines)-1)*20
|
|
y = (height - total_h) // 2
|
|
for (line,(w,h)) in zip(lines, sizes):
|
|
x = (width - w) // 2
|
|
draw.text((x,y), line, fill="black", font=font)
|
|
y += h + 20
|
|
buf = io.BytesIO()
|
|
img.save(buf, format="JPEG", quality=90)
|
|
return buf.getvalue()
|
|
|
|
def escape_xml(s: str) -> str:
|
|
return (s.replace("&","&")
|
|
.replace("<","<")
|
|
.replace(">",">")
|
|
.replace('"',""")
|
|
.replace("'","'"))
|
|
|
|
def build_comicinfo_xml(series, number, title, volume_year, year, month,
|
|
publisher, writer, characters, fmt, page_count):
|
|
chars_joined = ", ".join(characters if isinstance(characters, (list, tuple)) else [characters])
|
|
xml = f"""<?xml version="1.0" encoding="utf-8"?>
|
|
<ComicInfo>
|
|
<Title>{escape_xml(title)}</Title>
|
|
<Series>{escape_xml(series)}</Series>
|
|
<Number>{number}</Number>
|
|
<Volume>{volume_year}</Volume>
|
|
<Year>{year}</Year>
|
|
<Month>{month}</Month>
|
|
<Publisher>{escape_xml(publisher)}</Publisher>
|
|
<Writer>{escape_xml(writer)}</Writer>
|
|
<Characters>{escape_xml(chars_joined)}</Characters>
|
|
<Format>{escape_xml(fmt)}</Format>
|
|
<LanguageISO>en</LanguageISO>
|
|
<PageCount>{page_count}</PageCount>
|
|
<Summary>Generated for application stress testing.</Summary>
|
|
</ComicInfo>
|
|
"""
|
|
return xml
|
|
|
|
def make_filename(series, issue_no, month_name, year):
|
|
return f"{series} #{issue_no:03d} [{month_name}, {year}].cbz"
|
|
|
|
def issues_for_format(fmt_norm: str) -> int:
|
|
if fmt_norm == "Main Series": return random.randint(1, 500)
|
|
if fmt_norm == "Limited Series": return random.randint(1, 15)
|
|
if fmt_norm == "One-Shot": return 1
|
|
if fmt_norm == "TPB": return random.randint(1, 10)
|
|
if fmt_norm == "Director's Cut": return random.randint(1, 5)
|
|
if fmt_norm == "Annual": return random.randint(1, 5)
|
|
return 1
|
|
|
|
# ---------- continue existing volumes ----------
|
|
def series_target_dir(base_out: Path, publisher: str, character: str, fmt_display: str,
|
|
volume_year: int, series: str) -> Path:
|
|
series_folder_name = f"({volume_year}) {series}"
|
|
return base_out / slugify(publisher) / slugify(character) / slugify(fmt_display) / slugify(series_folder_name)
|
|
|
|
def scan_existing_issue_info(target_dir: Path, series: str):
|
|
"""
|
|
Returns (existing_max_issue_no, first_issue_year, first_issue_month)
|
|
for files like 'Series Name #NNN [Month, Year].cbz'
|
|
"""
|
|
if not target_dir.exists():
|
|
return 0, None, None
|
|
max_no = 0
|
|
first_year = None
|
|
first_month = None
|
|
pat = re.compile(rf"^{re.escape(series)} #(\d{{3}}) \[([A-Za-z]+), (\d{{4}})\]$")
|
|
for p in target_dir.glob("*.cbz"):
|
|
m = pat.match(p.stem)
|
|
if not m: continue
|
|
n = int(m.group(1))
|
|
mon_name = m.group(2)
|
|
yr = int(m.group(3))
|
|
if n > max_no: max_no = n
|
|
if n == 1 and mon_name in MONTHS:
|
|
first_month = MONTHS.index(mon_name) + 1
|
|
first_year = yr
|
|
return max_no, first_year, first_month
|
|
|
|
# ---------- core generation ----------
|
|
def generate_issue_cbz(base_out: Path, publisher: str, character: str, fmt_display: str,
|
|
series: str, issue_no: int, writer: str,
|
|
volume_year: int, issue_year: int, issue_month: int, page_count: int):
|
|
# Enforce cap at the final gate too (paranoia)
|
|
if issue_year > MAX_YEAR:
|
|
return None
|
|
|
|
month_name = MONTHS[issue_month - 1]
|
|
title = f"{series} #{issue_no}"
|
|
target_dir = series_target_dir(base_out, publisher, character, fmt_display, volume_year, series)
|
|
target_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
cbz_name = make_filename(series, issue_no, month_name, issue_year)
|
|
cbz_path = target_dir / cbz_name
|
|
|
|
comicinfo_xml = build_comicinfo_xml(
|
|
series=series, number=issue_no, title=title,
|
|
volume_year=volume_year, year=issue_year, month=issue_month,
|
|
publisher=publisher, writer=writer, characters=[character],
|
|
fmt=fmt_display, page_count=page_count
|
|
)
|
|
|
|
with zipfile.ZipFile(cbz_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
|
|
for i in range(1, page_count + 1):
|
|
filename = zero_pad_page(i)
|
|
img_bytes = make_jpeg_bytes(cbz_name if i == 1 else f"Page {i}")
|
|
zf.writestr(filename, img_bytes)
|
|
zf.writestr("ComicInfo.xml", comicinfo_xml)
|
|
return cbz_path
|
|
|
|
def estimate_total_issues(series_count: int):
|
|
total = 0
|
|
for _ in range(series_count):
|
|
fmt = normalize_format(random.choice(FORMAT_OPTIONS))
|
|
total += issues_for_format(fmt)
|
|
return total # rough estimate; cap may reduce actual total when continuing existing
|
|
|
|
def generate_one_series(base_out: Path, publishers, writers_by_pub, chars_by_pub, works,
|
|
counter, total_issues):
|
|
publisher = random.choice(publishers)
|
|
character = choose_character(publisher, chars_by_pub)
|
|
fmt_display = random.choice(FORMAT_OPTIONS)
|
|
fmt_norm = normalize_format(fmt_display)
|
|
|
|
writer = choose_writer(publisher, writers_by_pub)
|
|
series = rand_series_title(works)
|
|
if random.random() < 0.35:
|
|
series = f"{character}: {series}"
|
|
|
|
n_issues = issues_for_format(fmt_norm)
|
|
|
|
# Choose start date with MAX_YEAR cap in mind (unless continuing an existing volume)
|
|
if fmt_norm == "Annual":
|
|
start_year, start_month = rand_start_date_for_annuals(n_issues)
|
|
else:
|
|
start_year, start_month = rand_start_date_for_monthly(n_issues)
|
|
|
|
volume_year = start_year # volume = year of #1
|
|
|
|
# If folder exists, continue numbering and keep original #1 date if found
|
|
target_dir = series_target_dir(base_out, publisher, character, fmt_display, volume_year, series)
|
|
existing_max, first_y, first_m = scan_existing_issue_info(target_dir, series)
|
|
if first_y and first_m:
|
|
start_year, start_month = first_y, first_m
|
|
volume_year = first_y
|
|
|
|
start_issue = max(1, existing_max + 1)
|
|
if start_issue > n_issues:
|
|
return # nothing left to create
|
|
|
|
def rand_pages(): return random.randint(5, 10)
|
|
|
|
if fmt_norm == "Annual":
|
|
# Issue i => year = start_year + (i-1), month fixed
|
|
for issue_no in range(start_issue, n_issues + 1):
|
|
y = start_year + (issue_no - 1)
|
|
if y > MAX_YEAR:
|
|
break
|
|
m = start_month
|
|
if generate_issue_cbz(base_out, publisher, character, fmt_display, series,
|
|
issue_no, writer, volume_year, y, m, rand_pages()):
|
|
counter[0] += 1
|
|
if counter[0] % 100 == 0:
|
|
print(f"Generated {counter[0]} issues out of {total_issues}")
|
|
else:
|
|
# Monthly progression
|
|
for issue_no in range(start_issue, n_issues + 1):
|
|
y, m = add_months(start_year, start_month, issue_no - 1)
|
|
if y > MAX_YEAR:
|
|
break
|
|
if generate_issue_cbz(base_out, publisher, character, fmt_display, series,
|
|
issue_no, writer, volume_year, y, m, rand_pages()):
|
|
counter[0] += 1
|
|
if counter[0] % 100 == 0:
|
|
print(f"Generated {counter[0]} issues out of {total_issues}")
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Generate CBZ files for stress testing.")
|
|
parser.add_argument("count", type=int, help="Number of series to generate")
|
|
parser.add_argument("--out", type=Path, default=Path("output_cbz"), help="Output base directory")
|
|
parser.add_argument("--data", type=Path, default=Path("./comicdata.json"),
|
|
help="Path to comic data JSON (default: ./comicdata.json)")
|
|
parser.add_argument("--seed", type=int, default=None, help="Random seed for reproducibility")
|
|
args = parser.parse_args()
|
|
|
|
if args.seed is not None:
|
|
random.seed(args.seed)
|
|
|
|
publishers, writers_by_pub, chars_by_pub, works = load_data(args.data)
|
|
args.out.mkdir(parents=True, exist_ok=True)
|
|
|
|
total_issues_est = estimate_total_issues(args.count) # rough
|
|
if args.seed is not None:
|
|
random.seed(args.seed)
|
|
|
|
counter = [0]
|
|
for _ in range(args.count):
|
|
generate_one_series(args.out, publishers, writers_by_pub, chars_by_pub, works,
|
|
counter, total_issues_est)
|
|
|
|
print(f"Done. Generated {counter[0]} issues total (estimated {total_issues_est}).")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|