#!/usr/bin/python2 """ Ver 2.0 Overview: This script grabs the newest issues from ComicVine and 'appends' them to the missing.mcl file contents. usage: python update_missing.py in_file: missing.mcl (i.e., your most recent version) out_file: missing_ (or w/e you want to name it) api_key: provided by ComicVine start_date: the date the mcl file is synched with (YYYY-MM-DD) end_date: today's date (YYYY-MM-DD) e.g., python update_missing.py missing.mcl missing_20170917.mcl API_KEY 2017-09-11 2017-09-17 Technical stuff: The mcl file format contains a header followed by a list of volumes with their respective issues/numbers. Missing; ;list of ;list of ;list of ;list of ... ;list of ;list of The lists are comma delimited. Commas followed immediately by a space are not considered a delimiter. Some issues are numbered like "v. 1, no. 01". If there is a space in the list of issue numbers, the entire list is wrapped in double quotes. Note: There is one volume (id: 77901) that has an issue number "1,5". This can potentially wreak some havoc if not treated carefully. """ import requests import sys import os import re from datetime import date,datetime ROOT_DIR = os.path.join(os.path.realpath(os.path.join(os.path.dirname(__file__), '..')), 'Update Missing/') temp = "" for i in os.listdir(ROOT_DIR): if (i.find("_latest") != -1): temp=i old_date = re.search('[0-9]{8}',temp).group() in_file = str(ROOT_DIR)+str(temp) out_file = str(ROOT_DIR)+str(date.today().strftime("%Y%m%d")+"_latest.mcl") data=[] with open(ROOT_DIR+"/.apikey", "r") as f: data = f.readlines() api_key = str(data[0].strip('\n')) start_date = str(datetime.strptime(re.search('[0-9]{8}',temp).group(),"%Y%m%d").strftime("%Y-%m-%d")) end_date = str(date.today().strftime("%Y-%m-%d")) today_date = str(date.today().strftime("%Y%m%d")) f1=open(ROOT_DIR+'log/'+today_date+'.log', 'a') if len(sys.argv) > 5: print("Using argvs") in_file = str(sys.argv[1]) # missing.mcl out_file = str(sys.argv[2]) # updated_missing.mcl api_key = str(sys.argv[3]) # ComicVine API key start_date = str(sys.argv[4]) # start date range to search for new issues end_date = str(sys.argv[5]) # end date range to search for new issues comiclist = open(in_file, "r") issues_number = {} issues_volume = {} skip_header = True cont = 0 exit print("py: Reading in current database") for line in comiclist: if skip_header: skip_header = False continue line_split = line.replace("\n","").split(";") volume_id = int(line_split[0]) if (line_split[1][0] == '"') and (line_split[1][len(line_split[1])] == '"'): line_split = line_split[1:-1] issue_split = line_split[1].split(",") num_split = line_split[2].split(",") for i in range(0,len(issue_split)): #if issues_number.has_key(int(issue_split[i])): #if int(issue_split[i]) in issue_number: if issues_number.__contains__(int(issue_split[i])): cont += 1 issues_number[int(issue_split[i])] = num_split[i] issues_volume[int(issue_split[i])] = volume_id comiclist.close() print("py: Querying ComicVine for new issues") headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'} new_comics_cont = 0 old_comics_cont = 0 updated_comics_cont = 0 deleted_comics_cont = 0 comic_skip_cont = 0 offset = 0 max = 100 limit = 100 skip = 0 retry = 0 ErrorIds = "" non_retrieved_comics = issues_number.copy() FindingError = False while offset < max: try: request_url = "https://comicvine.gamespot.com/api/issues/?api_key=" + api_key +"&limit=" + str(limit) + "&format=json&offset=" + str(offset) + "&field_list=id,issue_number,volume&filter=date_last_updated:" + start_date + "|" + end_date + "&sort=id" #print request_url r = requests.get(request_url, headers=headers) json_obj = r.json() max = json_obj['number_of_total_results'] print("py:" + str(min(offset,max)) + "/" + str(max) + " Since " + start_date) for i in json_obj['results']: volume_id = i['volume']['id'] issue_id = i['id'] num = i['issue_number'].replace(",",".&@1").replace(";",".&@2").replace("\n","").replace("\r","") if not issues_number.__contains__(issue_id): new_comics_cont += 1 issues_number[issue_id] = num issues_volume[issue_id] = volume_id else: del non_retrieved_comics[issue_id] old_comics_cont += 1 if issues_number[issue_id] != num or issues_volume[issue_id] != volume_id: updated_comics_cont += 1 issues_number[issue_id] = num issues_volume[issue_id] = volume_id offset += limit + skip FindingError = False if skip == 1: print("py: Comic with error found, id= " + str(issue_id+1)) ErrorIds += ";"+ str(issue_id+1) comic_skip_cont += 1 print("py: Continue loading comics now...") FindingError = True skip = 0 limit = 100 retry = 0 except: if retry < 4 and not FindingError: print("py: Error. Trying Again...") retry += 1 else: if not FindingError: print("py: Finding Error in comic list: " + str(100-limit) + "%") skip = 1 limit -= 1 if limit == 0 or FindingError: print("py: Comic with error found, id= " + str(issue_id+offset)) FindingError = True limit = 1 offset += 1 comic_skip_cont += 1 ErrorIds += ";"+ str(issue_id+offset) comics = {} for issue_id in issues_number.keys(): if not comics.__contains__(issues_volume[issue_id]): comics[issues_volume[issue_id]] = {} comics[issues_volume[issue_id]][issue_id]=issues_number[issue_id] print("py: Writing missings to file") deleted_file = open(ROOT_DIR+"Deleted_Comics.txt", "wb") for issue_id in non_retrieved_comics.keys(): deleted_file.write((str(issue_id)+"\n").encode()) deleted_comics_cont += 1 deleted_file.close() print("py: Writing database to file") outfile = open(out_file,"wb") outfile.write(("Missing;" + end_date + "\n").encode()) for volume_id in sorted(comics): issues = "" nums = "" for issue_id in sorted(comics[volume_id]): issues += str(issue_id) + "," nums += comics[volume_id][issue_id] + "," issues = issues[:-1] outfile.write((str(volume_id) + ";" + issues + ";" + nums + "\n").encode()) outfile.close() print("py: Done! " + str(new_comics_cont) + " comics added to database! (" + str(comic_skip_cont)+ " skipped and " + str(old_comics_cont) + " comics already in database)") print("py: " + str(deleted_comics_cont) + " comics in databased not retrieved in this round.") print("py: " + str(updated_comics_cont) + " comics updated in database.") print("py: Ids with error in server: " + ErrorIds[1:]) print("py: " + str(cont)) #raw_input("Press Enter to continue...")