From cab1dc2dcb8036b5e5ef2fce41ed7ad4cfcc8227 Mon Sep 17 00:00:00 2001 From: Frederik Baerentsen Date: Tue, 7 Jun 2022 21:07:28 +0200 Subject: [PATCH] Testing zip errors --- docker-compose.yml | 2 +- main.py | 48 +++++++++++++++++------------ opds/test.json | 77 ---------------------------------------------- requirements.txt | 1 + test.json | 10 ++++++ 5 files changed, 40 insertions(+), 98 deletions(-) delete mode 100644 opds/test.json diff --git a/docker-compose.yml b/docker-compose.yml index e2a7201..728c06f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -7,5 +7,5 @@ services: ports: - '5000:5000' volumes: - - '/opt/data/Comics/ComicRack:/library:ro' + - '/opt/data/Comics/ComicRack/Oni Press:/library:ro' - '${PWD}/:/app' diff --git a/main.py b/main.py index aff7a5e..a8ca9b1 100644 --- a/main.py +++ b/main.py @@ -6,10 +6,12 @@ import timeit import sqlite3 import os import zipfile +import gzip from bs4 import BeautifulSoup import re import datetime import sys +import time from opds import fromdir import config @@ -35,33 +37,39 @@ def healthz(): @app.route('/import') def import2sql(): - conn = sqlite3.connect('app.db') + conn = sqlite3.connect('/app/app.db') list = [] start_time = timeit.default_timer() for root, dirs, files in os.walk(os.path.abspath(config.CONTENT_BASE_DIR)): for file in files: f = os.path.join(root, file) - s = zipfile.ZipFile(f) - Bs_data = BeautifulSoup(s.open('ComicInfo.xml').read(), "xml") - #print(Bs_data.select('Series')[0].text, file=sys.stderr) - #print(Bs_data.select('Title')[0].text, file=sys.stderr) - CVDB=re.findall('(?<=\[CVDB)(.*)(?=].)', Bs_data.select('Notes')[0].text) - #list.append('CVDB'+CVDB[0] + ': ' + Bs_data.select('Series')[0].text + "(" + Bs_data.select('Volume')[0].text + ") : " + Bs_data.select('Number')[0].text ) - #print(list, file=sys.stdout) + #try: + print(f,file=sys.stdout) + try: + s = zipfile.ZipFile(f) + #s = gzip.GzipFile(f) + Bs_data = BeautifulSoup(s.open('ComicInfo.xml').read(), "xml") + #print(Bs_data.select('Series')[0].text, file=sys.stderr) + #print(Bs_data.select('Title')[0].text, file=sys.stderr) + CVDB=re.findall('(?<=\[CVDB)(.*)(?=].)', Bs_data.select('Notes')[0].text) + #list.append('CVDB'+CVDB[0] + ': ' + Bs_data.select('Series')[0].text + "(" + Bs_data.select('Volume')[0].text + ") : " + Bs_data.select('Number')[0].text ) + #print(list, file=sys.stdout) - ISSUE=Bs_data.select('Number')[0].text - SERIES=Bs_data.select('Series')[0].text - VOLUME=Bs_data.select('Volume')[0].text - PUBLISHER=Bs_data.select('Publisher')[0].text - TITLE=Bs_data.select('Title')[0].text - PATH=f - UPDATED=str(datetime.datetime.now()) - print(UPDATED,file=sys.stdout) - sql="INSERT OR REPLACE INTO COMICS (CVDB,ISSUE,SERIES,VOLUME, PUBLISHER, TITLE, FILE,PATH,UPDATED) VALUES ("+CVDB[0]+",'"+ISSUE+"','"+SERIES+"','"+VOLUME+"','"+PUBLISHER+"','"+TITLE+"','"+file+"','" + f + "','" + UPDATED + "')" - print(sql,file=sys.stdout) - conn.execute(sql); - conn.commit() + ISSUE=Bs_data.select('Number')[0].text + SERIES=Bs_data.select('Series')[0].text + VOLUME=Bs_data.select('Volume')[0].text + PUBLISHER=Bs_data.select('Publisher')[0].text + TITLE=Bs_data.select('Title')[0].text + PATH=f + UPDATED=str(datetime.datetime.now()) + #print(UPDATED,file=sys.stdout) + sql="INSERT OR REPLACE INTO COMICS (CVDB,ISSUE,SERIES,VOLUME, PUBLISHER, TITLE, FILE,PATH,UPDATED) VALUES ("+CVDB[0]+",'"+ISSUE+"','"+SERIES+"','"+VOLUME+"','"+PUBLISHER+"','"+TITLE+"','"+file+"','" + f + "','" + UPDATED + "')" + #print(sql,file=sys.stdout) + conn.execute(sql); + conn.commit() + except: + print(f,file=sys.stdout) conn.close() elapsed = timeit.default_timer() - start_time diff --git a/opds/test.json b/opds/test.json deleted file mode 100644 index 5c095f2..0000000 --- a/opds/test.json +++ /dev/null @@ -1,77 +0,0 @@ -[ - { - "SQL TEST": [ - { - "SQL": "(series like '%Aqua%' or series like '%girl%') and issue in ('1','2','5','10') and title not like '%Annual%'" - } - ] - },{ - "Man 2020,2019": [ - { - "title": "Man", - "volume": [ - "2020", - "2019" - ], - "publisher": "", - "series": "", - "issue": "" - } - ] - }, - { - "DC (BAT)": [ - { - "title": "", - "volume": "", - "publisher": "DC Comics", - "series": "Bat", - "issue": "" - } - ] - },{ - "Marvel": [ - { - "title": "", - "volume": "", - "publisher": "marvel", - "series": "", - "issue": "" - } - ] - }, - { - "Girl": [ - { - "title": ["girl","man","World"], - "volume": "", - "publisher": "", - "series": "girl", - "issue": "" - } - ] - }, - { - "Aquaman": [ - { - "title": "", - "volume": "", - "publisher": "", - "series": "aquaman", - "issue": ["2","3","5","10","22"] - } - ] - } -, - { - "Girl series": [ - { - "title": "", - "volume": "", - "publisher": "", - "series": "girl", - "issue": "2" - } - ] - } -] diff --git a/requirements.txt b/requirements.txt index 7118e39..cecb968 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ Flask-HTTPAuth==4.5.0 gevent==21.8.0 bs4 lxml +gzip diff --git a/test.json b/test.json index 5c095f2..df7dc2d 100644 --- a/test.json +++ b/test.json @@ -5,6 +5,16 @@ "SQL": "(series like '%Aqua%' or series like '%girl%') and issue in ('1','2','5','10') and title not like '%Annual%'" } ] + },{ + "Letter 44": [ + { + "title": "", + "volume": "", + "publisher": "", + "series": "Letter 44", + "issue": "" + } + ] },{ "Man 2020,2019": [ {