ComicRack_Scripts/Update Missing/update_missing.py

229 lines
7.6 KiB
Python
Raw Permalink Normal View History

2022-07-15 09:21:05 +02:00
#!/usr/bin/python2
2022-07-08 10:29:37 +02:00
"""
Ver 2.0
Overview:
This script grabs the newest issues from ComicVine and 'appends' them to the
missing.mcl file contents.
usage:
python update_missing.py <in_file> <out_file> <api_key> <start_date> <end_date>
in_file: missing.mcl (i.e., your most recent version)
out_file: missing_<date> (or w/e you want to name it)
api_key: provided by ComicVine
start_date: the date the mcl file is synched with (YYYY-MM-DD)
end_date: today's date (YYYY-MM-DD)
e.g.,
python update_missing.py missing.mcl missing_20170917.mcl API_KEY 2017-09-11 2017-09-17
Technical stuff:
The mcl file format contains a header followed by a list of volumes with
their respective issues/numbers.
Missing;<date_of_last_update>
<volume_id>;list of <issue_id>;list of <issue_num>
<volume_id>;list of <issue_id>;list of <issue_num>
...
<volume_id>;list of <issue_id>;list of <issue_num>
The lists are comma delimited. Commas followed immediately by a space are
not considered a delimiter. Some issues are numbered like "v. 1, no. 01".
If there is a space in the list of issue numbers, the entire list is
wrapped in double quotes.
Note: There is one volume (id: 77901) that has an issue number "1,5". This
can potentially wreak some havoc if not treated carefully.
"""
import requests
import sys
2022-07-15 09:21:05 +02:00
import os
2024-11-12 16:49:10 +01:00
import io
2022-07-15 09:21:05 +02:00
import re
from datetime import date,datetime
2022-07-15 10:43:24 +02:00
ROOT_DIR = os.path.join(os.path.realpath(os.path.join(os.path.dirname(__file__), '..')), 'Update Missing/')
2022-07-15 09:21:05 +02:00
temp = ""
2022-07-15 10:43:24 +02:00
for i in os.listdir(ROOT_DIR):
2022-07-15 09:21:05 +02:00
if (i.find("_latest") != -1):
temp=i
old_date = re.search('[0-9]{8}',temp).group()
2022-07-15 10:43:24 +02:00
in_file = str(ROOT_DIR)+str(temp)
out_file = str(ROOT_DIR)+str(date.today().strftime("%Y%m%d")+"_latest.mcl")
2022-07-15 09:21:05 +02:00
data=[]
2022-07-15 10:43:24 +02:00
with open(ROOT_DIR+"/.apikey", "r") as f:
2022-07-15 09:21:05 +02:00
data = f.readlines()
api_key = str(data[0].strip('\n'))
start_date = str(datetime.strptime(re.search('[0-9]{8}',temp).group(),"%Y%m%d").strftime("%Y-%m-%d"))
end_date = str(date.today().strftime("%Y-%m-%d"))
2022-07-15 09:53:26 +02:00
today_date = str(date.today().strftime("%Y%m%d"))
2022-07-15 09:21:05 +02:00
2022-07-15 10:43:24 +02:00
f1=open(ROOT_DIR+'log/'+today_date+'.log', 'a')
2022-07-15 09:21:05 +02:00
if len(sys.argv) > 5:
2022-10-05 16:31:47 +02:00
print("Using argvs")
2022-07-15 09:21:05 +02:00
in_file = str(sys.argv[1]) # missing.mcl
out_file = str(sys.argv[2]) # updated_missing.mcl
api_key = str(sys.argv[3]) # ComicVine API key
start_date = str(sys.argv[4]) # start date range to search for new issues
end_date = str(sys.argv[5]) # end date range to search for new issues
2022-07-08 10:29:37 +02:00
comiclist = open(in_file, "r")
issues_number = {}
issues_volume = {}
skip_header = True
cont = 0
2022-07-15 10:43:24 +02:00
exit
2022-10-05 16:31:47 +02:00
print("py: Reading in current database")
2022-07-15 09:53:26 +02:00
2022-07-08 10:29:37 +02:00
for line in comiclist:
if skip_header:
skip_header = False
continue
2022-10-05 16:31:47 +02:00
line_split = line.replace("\n","").split(";")
2022-07-08 10:29:37 +02:00
volume_id = int(line_split[0])
if (line_split[1][0] == '"') and (line_split[1][len(line_split[1])] == '"'):
line_split = line_split[1:-1]
issue_split = line_split[1].split(",")
num_split = line_split[2].split(",")
for i in range(0,len(issue_split)):
2022-10-05 16:31:47 +02:00
#if issues_number.has_key(int(issue_split[i])):
#if int(issue_split[i]) in issue_number:
if issues_number.__contains__(int(issue_split[i])):
2022-07-08 10:29:37 +02:00
cont += 1
issues_number[int(issue_split[i])] = num_split[i]
issues_volume[int(issue_split[i])] = volume_id
comiclist.close()
2022-10-05 16:31:47 +02:00
print("py: Querying ComicVine for new issues")
2024-11-12 16:50:03 +01:00
headers = {'User-Agent': 'Update Missing/ (https://gitea.baerentsen.space/FrederikBaerentsen/ComicRack_Scripts/src/branch/master/Update Missing)'}
2022-07-08 10:29:37 +02:00
new_comics_cont = 0
old_comics_cont = 0
updated_comics_cont = 0
deleted_comics_cont = 0
comic_skip_cont = 0
offset = 0
max = 100
limit = 100
skip = 0
retry = 0
ErrorIds = ""
non_retrieved_comics = issues_number.copy()
FindingError = False
while offset < max:
try:
request_url = "https://comicvine.gamespot.com/api/issues/?api_key=" + api_key +"&limit=" + str(limit) + "&format=json&offset=" + str(offset) + "&field_list=id,issue_number,volume&filter=date_last_updated:" + start_date + "|" + end_date + "&sort=id"
2022-07-15 09:53:26 +02:00
#print request_url
2022-07-08 10:29:37 +02:00
r = requests.get(request_url, headers=headers)
json_obj = r.json()
max = json_obj['number_of_total_results']
2022-10-05 16:31:47 +02:00
print("py:" + str(min(offset,max)) + "/" + str(max) + " Since " + start_date)
2022-07-08 10:29:37 +02:00
for i in json_obj['results']:
volume_id = i['volume']['id']
issue_id = i['id']
2022-10-05 16:31:47 +02:00
num = i['issue_number'].replace(",",".&@1").replace(";",".&@2").replace("\n","").replace("\r","")
2022-07-08 10:29:37 +02:00
2022-10-05 16:31:47 +02:00
if not issues_number.__contains__(issue_id):
2022-07-08 10:29:37 +02:00
new_comics_cont += 1
issues_number[issue_id] = num
issues_volume[issue_id] = volume_id
else:
del non_retrieved_comics[issue_id]
old_comics_cont += 1
if issues_number[issue_id] != num or issues_volume[issue_id] != volume_id:
updated_comics_cont += 1
issues_number[issue_id] = num
issues_volume[issue_id] = volume_id
offset += limit + skip
FindingError = False
if skip == 1:
2022-10-05 16:31:47 +02:00
print("py: Comic with error found, id= " + str(issue_id+1))
2022-07-08 10:29:37 +02:00
ErrorIds += ";"+ str(issue_id+1)
comic_skip_cont += 1
2022-10-05 16:31:47 +02:00
print("py: Continue loading comics now...")
2022-07-08 10:29:37 +02:00
FindingError = True
skip = 0
limit = 100
retry = 0
except:
if retry < 4 and not FindingError:
2022-10-05 16:31:47 +02:00
print("py: Error. Trying Again...")
2022-07-08 10:29:37 +02:00
retry += 1
else:
if not FindingError:
2022-10-05 16:31:47 +02:00
print("py: Finding Error in comic list: " + str(100-limit) + "%")
2022-07-08 10:29:37 +02:00
skip = 1
limit -= 1
if limit == 0 or FindingError:
2022-10-05 16:31:47 +02:00
print("py: Comic with error found, id= " + str(issue_id+offset))
2022-07-08 10:29:37 +02:00
FindingError = True
limit = 1
offset += 1
comic_skip_cont += 1
ErrorIds += ";"+ str(issue_id+offset)
comics = {}
for issue_id in issues_number.keys():
2022-10-05 16:31:47 +02:00
if not comics.__contains__(issues_volume[issue_id]):
2022-07-08 10:29:37 +02:00
comics[issues_volume[issue_id]] = {}
comics[issues_volume[issue_id]][issue_id]=issues_number[issue_id]
2022-10-05 16:31:47 +02:00
print("py: Writing missings to file")
2022-07-08 10:29:37 +02:00
2022-07-15 10:43:24 +02:00
deleted_file = open(ROOT_DIR+"Deleted_Comics.txt", "wb")
2022-07-08 10:29:37 +02:00
for issue_id in non_retrieved_comics.keys():
2022-10-05 16:31:47 +02:00
deleted_file.write((str(issue_id)+"\n").encode())
2022-07-08 10:29:37 +02:00
deleted_comics_cont += 1
deleted_file.close()
2022-10-05 16:31:47 +02:00
print("py: Writing database to file")
2022-07-08 10:29:37 +02:00
2024-11-12 16:49:10 +01:00
outfile = io.open(out_file,"w",encoding="utf8")
outfile.write(("Missing;" + end_date + "\n"))
2022-07-08 10:29:37 +02:00
2022-10-05 16:31:47 +02:00
for volume_id in sorted(comics):
2022-07-08 10:29:37 +02:00
issues = ""
nums = ""
2022-10-05 16:31:47 +02:00
for issue_id in sorted(comics[volume_id]):
2022-07-08 10:29:37 +02:00
issues += str(issue_id) + ","
nums += comics[volume_id][issue_id] + ","
issues = issues[:-1]
2024-11-12 16:49:10 +01:00
outfile.write((str(volume_id) + ";" + issues + ";" + nums + "\n"))
2022-07-08 10:29:37 +02:00
outfile.close()
2022-10-05 16:31:47 +02:00
print("py: Done! " + str(new_comics_cont) + " comics added to database! (" + str(comic_skip_cont)+ " skipped and " + str(old_comics_cont) + " comics already in database)")
print("py: " + str(deleted_comics_cont) + " comics in databased not retrieved in this round.")
print("py: " + str(updated_comics_cont) + " comics updated in database.")
print("py: Ids with error in server: " + ErrorIds[1:])
print("py: " + str(cont))
2022-07-15 09:21:05 +02:00
#raw_input("Press Enter to continue...")