Added UpdateMissing script
This commit is contained in:
parent
3de3d1e564
commit
7fe9c3e500
BIN
Update Missing/.update.bat.un~
Normal file
BIN
Update Missing/.update.bat.un~
Normal file
Binary file not shown.
BIN
Update Missing/.update.sh.un~
Normal file
BIN
Update Missing/.update.sh.un~
Normal file
Binary file not shown.
125693
Update Missing/20220404.mcl
Normal file
125693
Update Missing/20220404.mcl
Normal file
File diff suppressed because one or more lines are too long
125693
Update Missing/20220404_latest.mcl
Normal file
125693
Update Missing/20220404_latest.mcl
Normal file
File diff suppressed because one or more lines are too long
11
Update Missing/update.bat
Normal file
11
Update Missing/update.bat
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
@echo off
|
||||||
|
echo Date format = %date%
|
||||||
|
setlocal enabledelayedexpansion
|
||||||
|
set cnt=0
|
||||||
|
for %%i in (*.mcl) do (
|
||||||
|
set "num=%%~ni"
|
||||||
|
if !num! GTR !cnt! set cnt=!num!
|
||||||
|
)
|
||||||
|
set /a olddate=cnt
|
||||||
|
set /a olddate2=cnt-1
|
||||||
|
py -2 update_missing.py %olddate%.mcl %date:~6,4%%date:~3,2%%date:~0,2%.mcl <api key> %olddate2:~0,4%-%olddate2:~4,2%-%olddate2:~6,2% %date:~6,4%-%date:~3,2%-%date:~0,2%
|
11
Update Missing/update.bat~
Normal file
11
Update Missing/update.bat~
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
@echo off
|
||||||
|
echo Date format = %date%
|
||||||
|
setlocal enabledelayedexpansion
|
||||||
|
set cnt=0
|
||||||
|
for %%i in (*.mcl) do (
|
||||||
|
set "num=%%~ni"
|
||||||
|
if !num! GTR !cnt! set cnt=!num!
|
||||||
|
)
|
||||||
|
set /a olddate=cnt
|
||||||
|
set /a olddate2=cnt-1
|
||||||
|
py -2 update_missing.py %olddate%.mcl %date:~6,4%%date:~3,2%%date:~0,2%.mcl 9718933c0a3aa4407d6995d0f3bb310f5ee977f2 %olddate2:~0,4%-%olddate2:~4,2%-%olddate2:~6,2% %date:~6,4%-%date:~3,2%-%date:~0,2%
|
17
Update Missing/update.sh
Normal file
17
Update Missing/update.sh
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
d=`date +%Y%m%d`
|
||||||
|
|
||||||
|
echo "Todays date $d"
|
||||||
|
|
||||||
|
#mv "missing.mcl" "missing_old.mcl"
|
||||||
|
|
||||||
|
fromdate=$(ls -1 *_latest.mcl |grep -Eo '[[:digit:]]{8}')
|
||||||
|
|
||||||
|
echo "Last date run $fromdate"
|
||||||
|
|
||||||
|
|
||||||
|
python2 "update_missing.py" "$fromdate""_latest.mcl" "$d"".mcl" <API KEY> $fromdate $d
|
||||||
|
|
||||||
|
cp "$d"".mcl" "$d""_latest.mcl"
|
||||||
|
|
17
Update Missing/update.sh~
Normal file
17
Update Missing/update.sh~
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
d=`date +%Y%m%d`
|
||||||
|
|
||||||
|
echo "Todays date $d"
|
||||||
|
|
||||||
|
#mv "missing.mcl" "missing_old.mcl"
|
||||||
|
|
||||||
|
fromdate=$(ls -1 *_latest.mcl |grep -Eo '[[:digit:]]{8}')
|
||||||
|
|
||||||
|
echo "Last date run $fromdate"
|
||||||
|
|
||||||
|
|
||||||
|
py -2 "update_missing.py" "$fromdate""_latest.mcl" "$d"".mcl" 9718933c0a3aa4407d6995d0f3bb310f5ee977f2 $fromdate $d
|
||||||
|
|
||||||
|
cp "$d"".mcl" "$d""_latest.mcl"
|
||||||
|
|
202
Update Missing/update_missing.py
Normal file
202
Update Missing/update_missing.py
Normal file
@ -0,0 +1,202 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
"""
|
||||||
|
Ver 2.0
|
||||||
|
|
||||||
|
Overview:
|
||||||
|
|
||||||
|
This script grabs the newest issues from ComicVine and 'appends' them to the
|
||||||
|
missing.mcl file contents.
|
||||||
|
|
||||||
|
usage:
|
||||||
|
python update_missing.py <in_file> <out_file> <api_key> <start_date> <end_date>
|
||||||
|
|
||||||
|
in_file: missing.mcl (i.e., your most recent version)
|
||||||
|
out_file: missing_<date> (or w/e you want to name it)
|
||||||
|
api_key: provided by ComicVine
|
||||||
|
start_date: the date the mcl file is synched with (YYYY-MM-DD)
|
||||||
|
end_date: today's date (YYYY-MM-DD)
|
||||||
|
|
||||||
|
e.g.,
|
||||||
|
python update_missing.py missing.mcl missing_20170917.mcl API_KEY 2017-09-11 2017-09-17
|
||||||
|
|
||||||
|
Technical stuff:
|
||||||
|
|
||||||
|
The mcl file format contains a header followed by a list of volumes with
|
||||||
|
their respective issues/numbers.
|
||||||
|
|
||||||
|
Missing;<date_of_last_update>
|
||||||
|
<volume_id>;list of <issue_id>;list of <issue_num>
|
||||||
|
<volume_id>;list of <issue_id>;list of <issue_num>
|
||||||
|
...
|
||||||
|
<volume_id>;list of <issue_id>;list of <issue_num>
|
||||||
|
|
||||||
|
The lists are comma delimited. Commas followed immediately by a space are
|
||||||
|
not considered a delimiter. Some issues are numbered like "v. 1, no. 01".
|
||||||
|
If there is a space in the list of issue numbers, the entire list is
|
||||||
|
wrapped in double quotes.
|
||||||
|
|
||||||
|
Note: There is one volume (id: 77901) that has an issue number "1,5". This
|
||||||
|
can potentially wreak some havoc if not treated carefully.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import sys
|
||||||
|
|
||||||
|
if len(sys.argv) < 6 :
|
||||||
|
print ("usage: python update_missing.py <in_file> <out_file> <api_key> <start_date> <end_date>")
|
||||||
|
exit()
|
||||||
|
|
||||||
|
in_file = str(sys.argv[1]) # missing.mcl
|
||||||
|
out_file = str(sys.argv[2]) # updated_missing.mcl
|
||||||
|
api_key = str(sys.argv[3]) # ComicVine API key
|
||||||
|
start_date = str(sys.argv[4]) # start date range to search for new issues
|
||||||
|
end_date = str(sys.argv[5]) # end date range to search for new issues
|
||||||
|
|
||||||
|
comiclist = open(in_file, "r")
|
||||||
|
issues_number = {}
|
||||||
|
issues_volume = {}
|
||||||
|
skip_header = True
|
||||||
|
cont = 0
|
||||||
|
|
||||||
|
print ("Reading in current database")
|
||||||
|
for line in comiclist:
|
||||||
|
if skip_header:
|
||||||
|
skip_header = False
|
||||||
|
continue
|
||||||
|
|
||||||
|
line_split = unicode(line, encoding='utf-8').replace("\n","").split(";")
|
||||||
|
volume_id = int(line_split[0])
|
||||||
|
|
||||||
|
if (line_split[1][0] == '"') and (line_split[1][len(line_split[1])] == '"'):
|
||||||
|
line_split = line_split[1:-1]
|
||||||
|
|
||||||
|
issue_split = line_split[1].split(",")
|
||||||
|
num_split = line_split[2].split(",")
|
||||||
|
|
||||||
|
for i in range(0,len(issue_split)):
|
||||||
|
if issues_number.has_key(int(issue_split[i])):
|
||||||
|
cont += 1
|
||||||
|
issues_number[int(issue_split[i])] = num_split[i]
|
||||||
|
issues_volume[int(issue_split[i])] = volume_id
|
||||||
|
|
||||||
|
comiclist.close()
|
||||||
|
|
||||||
|
print ("Querying ComicVine for new issues")
|
||||||
|
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
|
||||||
|
new_comics_cont = 0
|
||||||
|
old_comics_cont = 0
|
||||||
|
updated_comics_cont = 0
|
||||||
|
deleted_comics_cont = 0
|
||||||
|
comic_skip_cont = 0
|
||||||
|
offset = 0
|
||||||
|
max = 100
|
||||||
|
limit = 100
|
||||||
|
skip = 0
|
||||||
|
retry = 0
|
||||||
|
ErrorIds = ""
|
||||||
|
non_retrieved_comics = issues_number.copy()
|
||||||
|
FindingError = False
|
||||||
|
|
||||||
|
while offset < max:
|
||||||
|
try:
|
||||||
|
request_url = "https://comicvine.gamespot.com/api/issues/?api_key=" + api_key +"&limit=" + str(limit) + "&format=json&offset=" + str(offset) + "&field_list=id,issue_number,volume&filter=date_last_updated:" + start_date + "|" + end_date + "&sort=id"
|
||||||
|
|
||||||
|
'''print request_url'''
|
||||||
|
r = requests.get(request_url, headers=headers)
|
||||||
|
json_obj = r.json()
|
||||||
|
|
||||||
|
max = json_obj['number_of_total_results']
|
||||||
|
|
||||||
|
print (str(min(offset,max)) + "/" + str(max) + " Since " + start_date)
|
||||||
|
|
||||||
|
for i in json_obj['results']:
|
||||||
|
volume_id = i['volume']['id']
|
||||||
|
issue_id = i['id']
|
||||||
|
num = unicode(i['issue_number']).replace(",",".&@1").replace(";",".&@2").replace("\n","").replace("\r","")
|
||||||
|
|
||||||
|
if not issues_number.has_key(issue_id):
|
||||||
|
new_comics_cont += 1
|
||||||
|
issues_number[issue_id] = num
|
||||||
|
issues_volume[issue_id] = volume_id
|
||||||
|
else:
|
||||||
|
del non_retrieved_comics[issue_id]
|
||||||
|
old_comics_cont += 1
|
||||||
|
if issues_number[issue_id] != num or issues_volume[issue_id] != volume_id:
|
||||||
|
updated_comics_cont += 1
|
||||||
|
issues_number[issue_id] = num
|
||||||
|
issues_volume[issue_id] = volume_id
|
||||||
|
|
||||||
|
offset += limit + skip
|
||||||
|
|
||||||
|
FindingError = False
|
||||||
|
|
||||||
|
if skip == 1:
|
||||||
|
print ("Comic with error found, id= " + str(issue_id+1))
|
||||||
|
ErrorIds += ";"+ str(issue_id+1)
|
||||||
|
comic_skip_cont += 1
|
||||||
|
print ("Continue loading comics now...")
|
||||||
|
FindingError = True
|
||||||
|
|
||||||
|
skip = 0
|
||||||
|
limit = 100
|
||||||
|
retry = 0
|
||||||
|
|
||||||
|
except:
|
||||||
|
if retry < 4 and not FindingError:
|
||||||
|
print ("Error. Trying Again...")
|
||||||
|
retry += 1
|
||||||
|
else:
|
||||||
|
|
||||||
|
if not FindingError:
|
||||||
|
|
||||||
|
print ("Finding Error in comic list: " + str(100-limit) + "%")
|
||||||
|
skip = 1
|
||||||
|
limit -= 1
|
||||||
|
|
||||||
|
if limit == 0 or FindingError:
|
||||||
|
print ("Comic with error found, id= " + str(issue_id+offset))
|
||||||
|
FindingError = True
|
||||||
|
limit = 1
|
||||||
|
offset += 1
|
||||||
|
comic_skip_cont += 1
|
||||||
|
ErrorIds += ";"+ str(issue_id+offset)
|
||||||
|
|
||||||
|
comics = {}
|
||||||
|
for issue_id in issues_number.keys():
|
||||||
|
if not comics.has_key(issues_volume[issue_id]):
|
||||||
|
comics[issues_volume[issue_id]] = {}
|
||||||
|
comics[issues_volume[issue_id]][issue_id]=issues_number[issue_id]
|
||||||
|
|
||||||
|
print ("Writing missings to file")
|
||||||
|
|
||||||
|
deleted_file = open("Deleted_Comics.txt", "wb")
|
||||||
|
|
||||||
|
for issue_id in non_retrieved_comics.keys():
|
||||||
|
deleted_file.write(str(issue_id)+"\n")
|
||||||
|
deleted_comics_cont += 1
|
||||||
|
|
||||||
|
deleted_file.close()
|
||||||
|
|
||||||
|
print ("Writing database to file")
|
||||||
|
|
||||||
|
outfile = open(out_file,"wb")
|
||||||
|
outfile.write("Missing;" + end_date + "\n")
|
||||||
|
|
||||||
|
for volume_id in sorted(comics.iterkeys()):
|
||||||
|
issues = ""
|
||||||
|
nums = ""
|
||||||
|
for issue_id in sorted(comics[volume_id].iterkeys()):
|
||||||
|
issues += str(issue_id) + ","
|
||||||
|
nums += comics[volume_id][issue_id] + ","
|
||||||
|
issues = issues[:-1]
|
||||||
|
outfile.write(str(volume_id) + ";" + issues + ";" + nums.encode('utf-8','ignore') + "\n")
|
||||||
|
|
||||||
|
outfile.close()
|
||||||
|
|
||||||
|
print ("Done! " + str(new_comics_cont) + " comics added to database! (" + str(comic_skip_cont)+ " skipped and " + str(old_comics_cont) + " comics already in database)" )
|
||||||
|
print (str(deleted_comics_cont) + " comics in databased not retrieved in this round.")
|
||||||
|
print (str(updated_comics_cont) + " comics updated in database.")
|
||||||
|
print ("Ids with error in server: " + ErrorIds[1:])
|
||||||
|
print (cont)
|
||||||
|
raw_input("Press Enter to continue...")
|
Loading…
Reference in New Issue
Block a user