228 lines
7.6 KiB
Executable File
228 lines
7.6 KiB
Executable File
Ver 2.0
This script grabs the newest issues from ComicVine and 'appends' them to the
missing.mcl file contents.
python update_missing.py <in_file> <out_file> <api_key> <start_date> <end_date>
in_file: missing.mcl (i.e., your most recent version)
out_file: missing_<date> (or w/e you want to name it)
api_key: provided by ComicVine
start_date: the date the mcl file is synched with (YYYY-MM-DD)
end_date: today's date (YYYY-MM-DD)
python update_missing.py missing.mcl missing_20170917.mcl API_KEY 2017-09-11 2017-09-17
Technical stuff:
The mcl file format contains a header followed by a list of volumes with
their respective issues/numbers.
<volume_id>;list of <issue_id>;list of <issue_num>
<volume_id>;list of <issue_id>;list of <issue_num>
<volume_id>;list of <issue_id>;list of <issue_num>
The lists are comma delimited. Commas followed immediately by a space are
not considered a delimiter. Some issues are numbered like "v. 1, no. 01".
If there is a space in the list of issue numbers, the entire list is
wrapped in double quotes.
Note: There is one volume (id: 77901) that has an issue number "1,5". This
can potentially wreak some havoc if not treated carefully.
import requests
import sys
import os
import re
from datetime import date,datetime
ROOT_DIR = os.path.join(os.path.realpath(os.path.join(os.path.dirname(__file__), '..')), 'Update Missing/')
temp = ""
for i in os.listdir(ROOT_DIR):
if (i.find("_latest") != -1):
old_date = re.search('[0-9]{8}',temp).group()
in_file = str(ROOT_DIR)+str(temp)
out_file = str(ROOT_DIR)+str(date.today().strftime("%Y%m%d")+"_latest.mcl")
with open(ROOT_DIR+"/.apikey", "r") as f:
data = f.readlines()
api_key = str(data[0].strip('\n'))
start_date = str(datetime.strptime(re.search('[0-9]{8}',temp).group(),"%Y%m%d").strftime("%Y-%m-%d"))
end_date = str(date.today().strftime("%Y-%m-%d"))
today_date = str(date.today().strftime("%Y%m%d"))
f1=open(ROOT_DIR+'log/'+today_date+'.log', 'a')
if len(sys.argv) > 5:
print("Using argvs")
in_file = str(sys.argv[1]) # missing.mcl
out_file = str(sys.argv[2]) # updated_missing.mcl
api_key = str(sys.argv[3]) # ComicVine API key
start_date = str(sys.argv[4]) # start date range to search for new issues
end_date = str(sys.argv[5]) # end date range to search for new issues
comiclist = open(in_file, "r")
issues_number = {}
issues_volume = {}
skip_header = True
cont = 0
print("py: Reading in current database")
for line in comiclist:
if skip_header:
skip_header = False
line_split = line.replace("\n","").split(";")
volume_id = int(line_split[0])
if (line_split[1][0] == '"') and (line_split[1][len(line_split[1])] == '"'):
line_split = line_split[1:-1]
issue_split = line_split[1].split(",")
num_split = line_split[2].split(",")
for i in range(0,len(issue_split)):
#if issues_number.has_key(int(issue_split[i])):
#if int(issue_split[i]) in issue_number:
if issues_number.__contains__(int(issue_split[i])):
cont += 1
issues_number[int(issue_split[i])] = num_split[i]
issues_volume[int(issue_split[i])] = volume_id
print("py: Querying ComicVine for new issues")
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
new_comics_cont = 0
old_comics_cont = 0
updated_comics_cont = 0
deleted_comics_cont = 0
comic_skip_cont = 0
offset = 0
max = 100
limit = 100
skip = 0
retry = 0
ErrorIds = ""
non_retrieved_comics = issues_number.copy()
FindingError = False
while offset < max:
request_url = "https://comicvine.gamespot.com/api/issues/?api_key=" + api_key +"&limit=" + str(limit) + "&format=json&offset=" + str(offset) + "&field_list=id,issue_number,volume&filter=date_last_updated:" + start_date + "|" + end_date + "&sort=id"
#print request_url
r = requests.get(request_url, headers=headers)
json_obj = r.json()
max = json_obj['number_of_total_results']
print("py:" + str(min(offset,max)) + "/" + str(max) + " Since " + start_date)
for i in json_obj['results']:
volume_id = i['volume']['id']
issue_id = i['id']
num = i['issue_number'].replace(",",".&@1").replace(";",".&@2").replace("\n","").replace("\r","")
if not issues_number.__contains__(issue_id):
new_comics_cont += 1
issues_number[issue_id] = num
issues_volume[issue_id] = volume_id
del non_retrieved_comics[issue_id]
old_comics_cont += 1
if issues_number[issue_id] != num or issues_volume[issue_id] != volume_id:
updated_comics_cont += 1
issues_number[issue_id] = num
issues_volume[issue_id] = volume_id
offset += limit + skip
FindingError = False
if skip == 1:
print("py: Comic with error found, id= " + str(issue_id+1))
ErrorIds += ";"+ str(issue_id+1)
comic_skip_cont += 1
print("py: Continue loading comics now...")
FindingError = True
skip = 0
limit = 100
retry = 0
if retry < 4 and not FindingError:
print("py: Error. Trying Again...")
retry += 1
if not FindingError:
print("py: Finding Error in comic list: " + str(100-limit) + "%")
skip = 1
limit -= 1
if limit == 0 or FindingError:
print("py: Comic with error found, id= " + str(issue_id+offset))
FindingError = True
limit = 1
offset += 1
comic_skip_cont += 1
ErrorIds += ";"+ str(issue_id+offset)
comics = {}
for issue_id in issues_number.keys():
if not comics.__contains__(issues_volume[issue_id]):
comics[issues_volume[issue_id]] = {}
print("py: Writing missings to file")
deleted_file = open(ROOT_DIR+"Deleted_Comics.txt", "wb")
for issue_id in non_retrieved_comics.keys():
deleted_comics_cont += 1
print("py: Writing database to file")
outfile = open(out_file,"wb")
outfile.write(("Missing;" + end_date + "\n").encode())
for volume_id in sorted(comics):
issues = ""
nums = ""
for issue_id in sorted(comics[volume_id]):
issues += str(issue_id) + ","
nums += comics[volume_id][issue_id] + ","
issues = issues[:-1]
outfile.write((str(volume_id) + ";" + issues + ";" + nums + "\n").encode())
print("py: Done! " + str(new_comics_cont) + " comics added to database! (" + str(comic_skip_cont)+ " skipped and " + str(old_comics_cont) + " comics already in database)")
print("py: " + str(deleted_comics_cont) + " comics in databased not retrieved in this round.")
print("py: " + str(updated_comics_cont) + " comics updated in database.")
print("py: Ids with error in server: " + ErrorIds[1:])
print("py: " + str(cont))
#raw_input("Press Enter to continue...")