#!/usr/bin/python2 """ Ver 2.0 Overview: This script grabs the newest issues from ComicVine and 'appends' them to the missing.mcl file contents. usage: python update_missing.py in_file: missing.mcl (i.e., your most recent version) out_file: missing_ (or w/e you want to name it) api_key: provided by ComicVine start_date: the date the mcl file is synched with (YYYY-MM-DD) end_date: today's date (YYYY-MM-DD) e.g., python update_missing.py missing.mcl missing_20170917.mcl API_KEY 2017-09-11 2017-09-17 Technical stuff: The mcl file format contains a header followed by a list of volumes with their respective issues/numbers. Missing; ;list of ;list of ;list of ;list of ... ;list of ;list of The lists are comma delimited. Commas followed immediately by a space are not considered a delimiter. Some issues are numbered like "v. 1, no. 01". If there is a space in the list of issue numbers, the entire list is wrapped in double quotes. Note: There is one volume (id: 77901) that has an issue number "1,5". This can potentially wreak some havoc if not treated carefully. """ import requests import sys import os import io import re from datetime import date,datetime ROOT_DIR = os.path.join(os.path.realpath(os.path.join(os.path.dirname(__file__), '..')), 'Update Missing/') temp = "" for i in os.listdir(ROOT_DIR): if (i.find("_latest") != -1): temp=i old_date = re.search('[0-9]{8}',temp).group() in_file = str(ROOT_DIR)+str(temp) out_file = str(ROOT_DIR)+str(date.today().strftime("%Y%m%d")+"_latest.mcl") data=[] with open(ROOT_DIR+"/.apikey", "r") as f: data = f.readlines() api_key = str(data[0].strip('\n')) start_date = str(datetime.strptime(re.search('[0-9]{8}',temp).group(),"%Y%m%d").strftime("%Y-%m-%d")) end_date = str(date.today().strftime("%Y-%m-%d")) today_date = str(date.today().strftime("%Y%m%d")) f1=open(ROOT_DIR+'log/'+today_date+'.log', 'a') if len(sys.argv) > 5: print("Using argvs") in_file = str(sys.argv[1]) # missing.mcl out_file = str(sys.argv[2]) # updated_missing.mcl api_key = str(sys.argv[3]) # ComicVine API key start_date = str(sys.argv[4]) # start date range to search for new issues end_date = str(sys.argv[5]) # end date range to search for new issues comiclist = open(in_file, "r") issues_number = {} issues_volume = {} skip_header = True cont = 0 exit print("py: Reading in current database") for line in comiclist: if skip_header: skip_header = False continue line_split = line.replace("\n","").split(";") volume_id = int(line_split[0]) if (line_split[1][0] == '"') and (line_split[1][len(line_split[1])] == '"'): line_split = line_split[1:-1] issue_split = line_split[1].split(",") num_split = line_split[2].split(",") for i in range(0,len(issue_split)): #if issues_number.has_key(int(issue_split[i])): #if int(issue_split[i]) in issue_number: if issues_number.__contains__(int(issue_split[i])): cont += 1 issues_number[int(issue_split[i])] = num_split[i] issues_volume[int(issue_split[i])] = volume_id comiclist.close() print("py: Querying ComicVine for new issues") headers = {'User-Agent': 'Update Missing/ (https://gitea.baerentsen.space/FrederikBaerentsen/ComicRack_Scripts/src/branch/master/Update Missing)'} new_comics_cont = 0 old_comics_cont = 0 updated_comics_cont = 0 deleted_comics_cont = 0 comic_skip_cont = 0 offset = 0 max = 100 limit = 100 skip = 0 retry = 0 ErrorIds = "" non_retrieved_comics = issues_number.copy() FindingError = False while offset < max: try: request_url = "https://comicvine.gamespot.com/api/issues/?api_key=" + api_key +"&limit=" + str(limit) + "&format=json&offset=" + str(offset) + "&field_list=id,issue_number,volume&filter=date_last_updated:" + start_date + "|" + end_date + "&sort=id" #print request_url r = requests.get(request_url, headers=headers) json_obj = r.json() max = json_obj['number_of_total_results'] print("py:" + str(min(offset,max)) + "/" + str(max) + " Since " + start_date) for i in json_obj['results']: volume_id = i['volume']['id'] issue_id = i['id'] num = i['issue_number'].replace(",",".&@1").replace(";",".&@2").replace("\n","").replace("\r","") if not issues_number.__contains__(issue_id): new_comics_cont += 1 issues_number[issue_id] = num issues_volume[issue_id] = volume_id else: del non_retrieved_comics[issue_id] old_comics_cont += 1 if issues_number[issue_id] != num or issues_volume[issue_id] != volume_id: updated_comics_cont += 1 issues_number[issue_id] = num issues_volume[issue_id] = volume_id offset += limit + skip FindingError = False if skip == 1: print("py: Comic with error found, id= " + str(issue_id+1)) ErrorIds += ";"+ str(issue_id+1) comic_skip_cont += 1 print("py: Continue loading comics now...") FindingError = True skip = 0 limit = 100 retry = 0 except: if retry < 4 and not FindingError: print("py: Error. Trying Again...") retry += 1 else: if not FindingError: print("py: Finding Error in comic list: " + str(100-limit) + "%") skip = 1 limit -= 1 if limit == 0 or FindingError: print("py: Comic with error found, id= " + str(issue_id+offset)) FindingError = True limit = 1 offset += 1 comic_skip_cont += 1 ErrorIds += ";"+ str(issue_id+offset) comics = {} for issue_id in issues_number.keys(): if not comics.__contains__(issues_volume[issue_id]): comics[issues_volume[issue_id]] = {} comics[issues_volume[issue_id]][issue_id]=issues_number[issue_id] print("py: Writing missings to file") deleted_file = open(ROOT_DIR+"Deleted_Comics.txt", "wb") for issue_id in non_retrieved_comics.keys(): deleted_file.write((str(issue_id)+"\n").encode()) deleted_comics_cont += 1 deleted_file.close() print("py: Writing database to file") outfile = io.open(out_file,"w",encoding="utf8") outfile.write(("Missing;" + end_date + "\n")) for volume_id in sorted(comics): issues = "" nums = "" for issue_id in sorted(comics[volume_id]): issues += str(issue_id) + "," nums += comics[volume_id][issue_id] + "," issues = issues[:-1] outfile.write((str(volume_id) + ";" + issues + ";" + nums + "\n")) outfile.close() print("py: Done! " + str(new_comics_cont) + " comics added to database! (" + str(comic_skip_cont)+ " skipped and " + str(old_comics_cont) + " comics already in database)") print("py: " + str(deleted_comics_cont) + " comics in databased not retrieved in this round.") print("py: " + str(updated_comics_cont) + " comics updated in database.") print("py: Ids with error in server: " + ErrorIds[1:]) print("py: " + str(cont)) #raw_input("Press Enter to continue...")