Added UpdateMissing script

This commit is contained in:
2022-07-08 10:29:37 +02:00
parent 3de3d1e564
commit 7fe9c3e500
9 changed files with 251644 additions and 0 deletions
Binary file not shown.
Binary file not shown.
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
+11
View File
@@ -0,0 +1,11 @@
@echo off
echo Date format = %date%
setlocal enabledelayedexpansion
set cnt=0
for %%i in (*.mcl) do (
set "num=%%~ni"
if !num! GTR !cnt! set cnt=!num!
)
set /a olddate=cnt
set /a olddate2=cnt-1
py -2 update_missing.py %olddate%.mcl %date:~6,4%%date:~3,2%%date:~0,2%.mcl <api key> %olddate2:~0,4%-%olddate2:~4,2%-%olddate2:~6,2% %date:~6,4%-%date:~3,2%-%date:~0,2%
+11
View File
@@ -0,0 +1,11 @@
@echo off
echo Date format = %date%
setlocal enabledelayedexpansion
set cnt=0
for %%i in (*.mcl) do (
set "num=%%~ni"
if !num! GTR !cnt! set cnt=!num!
)
set /a olddate=cnt
set /a olddate2=cnt-1
py -2 update_missing.py %olddate%.mcl %date:~6,4%%date:~3,2%%date:~0,2%.mcl 9718933c0a3aa4407d6995d0f3bb310f5ee977f2 %olddate2:~0,4%-%olddate2:~4,2%-%olddate2:~6,2% %date:~6,4%-%date:~3,2%-%date:~0,2%
+17
View File
@@ -0,0 +1,17 @@
#!/bin/bash
d=`date +%Y%m%d`
echo "Todays date $d"
#mv "missing.mcl" "missing_old.mcl"
fromdate=$(ls -1 *_latest.mcl |grep -Eo '[[:digit:]]{8}')
echo "Last date run $fromdate"
python2 "update_missing.py" "$fromdate""_latest.mcl" "$d"".mcl" <API KEY> $fromdate $d
cp "$d"".mcl" "$d""_latest.mcl"
+17
View File
@@ -0,0 +1,17 @@
#!/bin/bash
d=`date +%Y%m%d`
echo "Todays date $d"
#mv "missing.mcl" "missing_old.mcl"
fromdate=$(ls -1 *_latest.mcl |grep -Eo '[[:digit:]]{8}')
echo "Last date run $fromdate"
py -2 "update_missing.py" "$fromdate""_latest.mcl" "$d"".mcl" 9718933c0a3aa4407d6995d0f3bb310f5ee977f2 $fromdate $d
cp "$d"".mcl" "$d""_latest.mcl"
+202
View File
@@ -0,0 +1,202 @@
#!/usr/bin/env python
"""
Ver 2.0
Overview:
This script grabs the newest issues from ComicVine and 'appends' them to the
missing.mcl file contents.
usage:
python update_missing.py <in_file> <out_file> <api_key> <start_date> <end_date>
in_file: missing.mcl (i.e., your most recent version)
out_file: missing_<date> (or w/e you want to name it)
api_key: provided by ComicVine
start_date: the date the mcl file is synched with (YYYY-MM-DD)
end_date: today's date (YYYY-MM-DD)
e.g.,
python update_missing.py missing.mcl missing_20170917.mcl API_KEY 2017-09-11 2017-09-17
Technical stuff:
The mcl file format contains a header followed by a list of volumes with
their respective issues/numbers.
Missing;<date_of_last_update>
<volume_id>;list of <issue_id>;list of <issue_num>
<volume_id>;list of <issue_id>;list of <issue_num>
...
<volume_id>;list of <issue_id>;list of <issue_num>
The lists are comma delimited. Commas followed immediately by a space are
not considered a delimiter. Some issues are numbered like "v. 1, no. 01".
If there is a space in the list of issue numbers, the entire list is
wrapped in double quotes.
Note: There is one volume (id: 77901) that has an issue number "1,5". This
can potentially wreak some havoc if not treated carefully.
"""
import requests
import sys
if len(sys.argv) < 6 :
print ("usage: python update_missing.py <in_file> <out_file> <api_key> <start_date> <end_date>")
exit()
in_file = str(sys.argv[1]) # missing.mcl
out_file = str(sys.argv[2]) # updated_missing.mcl
api_key = str(sys.argv[3]) # ComicVine API key
start_date = str(sys.argv[4]) # start date range to search for new issues
end_date = str(sys.argv[5]) # end date range to search for new issues
comiclist = open(in_file, "r")
issues_number = {}
issues_volume = {}
skip_header = True
cont = 0
print ("Reading in current database")
for line in comiclist:
if skip_header:
skip_header = False
continue
line_split = unicode(line, encoding='utf-8').replace("\n","").split(";")
volume_id = int(line_split[0])
if (line_split[1][0] == '"') and (line_split[1][len(line_split[1])] == '"'):
line_split = line_split[1:-1]
issue_split = line_split[1].split(",")
num_split = line_split[2].split(",")
for i in range(0,len(issue_split)):
if issues_number.has_key(int(issue_split[i])):
cont += 1
issues_number[int(issue_split[i])] = num_split[i]
issues_volume[int(issue_split[i])] = volume_id
comiclist.close()
print ("Querying ComicVine for new issues")
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
new_comics_cont = 0
old_comics_cont = 0
updated_comics_cont = 0
deleted_comics_cont = 0
comic_skip_cont = 0
offset = 0
max = 100
limit = 100
skip = 0
retry = 0
ErrorIds = ""
non_retrieved_comics = issues_number.copy()
FindingError = False
while offset < max:
try:
request_url = "https://comicvine.gamespot.com/api/issues/?api_key=" + api_key +"&limit=" + str(limit) + "&format=json&offset=" + str(offset) + "&field_list=id,issue_number,volume&filter=date_last_updated:" + start_date + "|" + end_date + "&sort=id"
'''print request_url'''
r = requests.get(request_url, headers=headers)
json_obj = r.json()
max = json_obj['number_of_total_results']
print (str(min(offset,max)) + "/" + str(max) + " Since " + start_date)
for i in json_obj['results']:
volume_id = i['volume']['id']
issue_id = i['id']
num = unicode(i['issue_number']).replace(",",".&@1").replace(";",".&@2").replace("\n","").replace("\r","")
if not issues_number.has_key(issue_id):
new_comics_cont += 1
issues_number[issue_id] = num
issues_volume[issue_id] = volume_id
else:
del non_retrieved_comics[issue_id]
old_comics_cont += 1
if issues_number[issue_id] != num or issues_volume[issue_id] != volume_id:
updated_comics_cont += 1
issues_number[issue_id] = num
issues_volume[issue_id] = volume_id
offset += limit + skip
FindingError = False
if skip == 1:
print ("Comic with error found, id= " + str(issue_id+1))
ErrorIds += ";"+ str(issue_id+1)
comic_skip_cont += 1
print ("Continue loading comics now...")
FindingError = True
skip = 0
limit = 100
retry = 0
except:
if retry < 4 and not FindingError:
print ("Error. Trying Again...")
retry += 1
else:
if not FindingError:
print ("Finding Error in comic list: " + str(100-limit) + "%")
skip = 1
limit -= 1
if limit == 0 or FindingError:
print ("Comic with error found, id= " + str(issue_id+offset))
FindingError = True
limit = 1
offset += 1
comic_skip_cont += 1
ErrorIds += ";"+ str(issue_id+offset)
comics = {}
for issue_id in issues_number.keys():
if not comics.has_key(issues_volume[issue_id]):
comics[issues_volume[issue_id]] = {}
comics[issues_volume[issue_id]][issue_id]=issues_number[issue_id]
print ("Writing missings to file")
deleted_file = open("Deleted_Comics.txt", "wb")
for issue_id in non_retrieved_comics.keys():
deleted_file.write(str(issue_id)+"\n")
deleted_comics_cont += 1
deleted_file.close()
print ("Writing database to file")
outfile = open(out_file,"wb")
outfile.write("Missing;" + end_date + "\n")
for volume_id in sorted(comics.iterkeys()):
issues = ""
nums = ""
for issue_id in sorted(comics[volume_id].iterkeys()):
issues += str(issue_id) + ","
nums += comics[volume_id][issue_id] + ","
issues = issues[:-1]
outfile.write(str(volume_id) + ";" + issues + ";" + nums.encode('utf-8','ignore') + "\n")
outfile.close()
print ("Done! " + str(new_comics_cont) + " comics added to database! (" + str(comic_skip_cont)+ " skipped and " + str(old_comics_cont) + " comics already in database)" )
print (str(deleted_comics_cont) + " comics in databased not retrieved in this round.")
print (str(updated_comics_cont) + " comics updated in database.")
print ("Ids with error in server: " + ErrorIds[1:])
print (cont)
raw_input("Press Enter to continue...")