2022-07-15 09:21:05 +02:00
#!/usr/bin/python2
2022-07-08 10:29:37 +02:00
"""
Ver 2.0
Overview :
This script grabs the newest issues from ComicVine and ' appends ' them to the
missing . mcl file contents .
usage :
python update_missing . py < in_file > < out_file > < api_key > < start_date > < end_date >
in_file : missing . mcl ( i . e . , your most recent version )
out_file : missing_ < date > ( or w / e you want to name it )
api_key : provided by ComicVine
start_date : the date the mcl file is synched with ( YYYY - MM - DD )
end_date : today ' s date (YYYY-MM-DD)
e . g . ,
python update_missing . py missing . mcl missing_20170917 . mcl API_KEY 2017 - 09 - 11 2017 - 09 - 17
Technical stuff :
The mcl file format contains a header followed by a list of volumes with
their respective issues / numbers .
Missing ; < date_of_last_update >
< volume_id > ; list of < issue_id > ; list of < issue_num >
< volume_id > ; list of < issue_id > ; list of < issue_num >
. . .
< volume_id > ; list of < issue_id > ; list of < issue_num >
The lists are comma delimited . Commas followed immediately by a space are
not considered a delimiter . Some issues are numbered like " v. 1, no. 01 " .
If there is a space in the list of issue numbers , the entire list is
wrapped in double quotes .
Note : There is one volume ( id : 77901 ) that has an issue number " 1,5 " . This
can potentially wreak some havoc if not treated carefully .
"""
import requests
import sys
2022-07-15 09:21:05 +02:00
import os
import re
from datetime import date , datetime
2022-07-15 10:43:24 +02:00
ROOT_DIR = os . path . join ( os . path . realpath ( os . path . join ( os . path . dirname ( __file__ ) , ' .. ' ) ) , ' Update Missing/ ' )
2022-07-15 09:21:05 +02:00
temp = " "
2022-07-15 10:43:24 +02:00
for i in os . listdir ( ROOT_DIR ) :
2022-07-15 09:21:05 +02:00
if ( i . find ( " _latest " ) != - 1 ) :
temp = i
old_date = re . search ( ' [0-9] {8} ' , temp ) . group ( )
2022-07-15 10:43:24 +02:00
in_file = str ( ROOT_DIR ) + str ( temp )
out_file = str ( ROOT_DIR ) + str ( date . today ( ) . strftime ( " % Y % m %d " ) + " _latest.mcl " )
2022-07-15 09:21:05 +02:00
data = [ ]
2022-07-15 10:43:24 +02:00
with open ( ROOT_DIR + " /.apikey " , " r " ) as f :
2022-07-15 09:21:05 +02:00
data = f . readlines ( )
api_key = str ( data [ 0 ] . strip ( ' \n ' ) )
start_date = str ( datetime . strptime ( re . search ( ' [0-9] {8} ' , temp ) . group ( ) , " % Y % m %d " ) . strftime ( " % Y- % m- %d " ) )
end_date = str ( date . today ( ) . strftime ( " % Y- % m- %d " ) )
2022-07-15 09:53:26 +02:00
today_date = str ( date . today ( ) . strftime ( " % Y % m %d " ) )
2022-07-15 09:21:05 +02:00
2022-07-15 10:43:24 +02:00
f1 = open ( ROOT_DIR + ' log/ ' + today_date + ' .log ' , ' a ' )
2022-07-15 09:21:05 +02:00
if len ( sys . argv ) > 5 :
2022-10-05 16:31:47 +02:00
print ( " Using argvs " )
2022-07-15 09:21:05 +02:00
in_file = str ( sys . argv [ 1 ] ) # missing.mcl
out_file = str ( sys . argv [ 2 ] ) # updated_missing.mcl
api_key = str ( sys . argv [ 3 ] ) # ComicVine API key
start_date = str ( sys . argv [ 4 ] ) # start date range to search for new issues
end_date = str ( sys . argv [ 5 ] ) # end date range to search for new issues
2022-07-08 10:29:37 +02:00
comiclist = open ( in_file , " r " )
issues_number = { }
issues_volume = { }
skip_header = True
cont = 0
2022-07-15 10:43:24 +02:00
exit
2022-10-05 16:31:47 +02:00
print ( " py: Reading in current database " )
2022-07-15 09:53:26 +02:00
2022-07-08 10:29:37 +02:00
for line in comiclist :
if skip_header :
skip_header = False
continue
2022-10-05 16:31:47 +02:00
line_split = line . replace ( " \n " , " " ) . split ( " ; " )
2022-07-08 10:29:37 +02:00
volume_id = int ( line_split [ 0 ] )
if ( line_split [ 1 ] [ 0 ] == ' " ' ) and ( line_split [ 1 ] [ len ( line_split [ 1 ] ) ] == ' " ' ) :
line_split = line_split [ 1 : - 1 ]
issue_split = line_split [ 1 ] . split ( " , " )
num_split = line_split [ 2 ] . split ( " , " )
for i in range ( 0 , len ( issue_split ) ) :
2022-10-05 16:31:47 +02:00
#if issues_number.has_key(int(issue_split[i])):
#if int(issue_split[i]) in issue_number:
if issues_number . __contains__ ( int ( issue_split [ i ] ) ) :
2022-07-08 10:29:37 +02:00
cont + = 1
issues_number [ int ( issue_split [ i ] ) ] = num_split [ i ]
issues_volume [ int ( issue_split [ i ] ) ] = volume_id
comiclist . close ( )
2022-10-05 16:31:47 +02:00
print ( " py: Querying ComicVine for new issues " )
2022-07-08 10:29:37 +02:00
headers = { ' User-Agent ' : ' Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36 ' }
new_comics_cont = 0
old_comics_cont = 0
updated_comics_cont = 0
deleted_comics_cont = 0
comic_skip_cont = 0
offset = 0
max = 100
limit = 100
skip = 0
retry = 0
ErrorIds = " "
non_retrieved_comics = issues_number . copy ( )
FindingError = False
while offset < max :
try :
request_url = " https://comicvine.gamespot.com/api/issues/?api_key= " + api_key + " &limit= " + str ( limit ) + " &format=json&offset= " + str ( offset ) + " &field_list=id,issue_number,volume&filter=date_last_updated: " + start_date + " | " + end_date + " &sort=id "
2022-07-15 09:53:26 +02:00
#print request_url
2022-07-08 10:29:37 +02:00
r = requests . get ( request_url , headers = headers )
json_obj = r . json ( )
max = json_obj [ ' number_of_total_results ' ]
2022-10-05 16:31:47 +02:00
print ( " py: " + str ( min ( offset , max ) ) + " / " + str ( max ) + " Since " + start_date )
2022-07-08 10:29:37 +02:00
for i in json_obj [ ' results ' ] :
volume_id = i [ ' volume ' ] [ ' id ' ]
issue_id = i [ ' id ' ]
2022-10-05 16:31:47 +02:00
num = i [ ' issue_number ' ] . replace ( " , " , " .&@1 " ) . replace ( " ; " , " .&@2 " ) . replace ( " \n " , " " ) . replace ( " \r " , " " )
2022-07-08 10:29:37 +02:00
2022-10-05 16:31:47 +02:00
if not issues_number . __contains__ ( issue_id ) :
2022-07-08 10:29:37 +02:00
new_comics_cont + = 1
issues_number [ issue_id ] = num
issues_volume [ issue_id ] = volume_id
else :
del non_retrieved_comics [ issue_id ]
old_comics_cont + = 1
if issues_number [ issue_id ] != num or issues_volume [ issue_id ] != volume_id :
updated_comics_cont + = 1
issues_number [ issue_id ] = num
issues_volume [ issue_id ] = volume_id
offset + = limit + skip
FindingError = False
if skip == 1 :
2022-10-05 16:31:47 +02:00
print ( " py: Comic with error found, id= " + str ( issue_id + 1 ) )
2022-07-08 10:29:37 +02:00
ErrorIds + = " ; " + str ( issue_id + 1 )
comic_skip_cont + = 1
2022-10-05 16:31:47 +02:00
print ( " py: Continue loading comics now... " )
2022-07-08 10:29:37 +02:00
FindingError = True
skip = 0
limit = 100
retry = 0
except :
if retry < 4 and not FindingError :
2022-10-05 16:31:47 +02:00
print ( " py: Error. Trying Again... " )
2022-07-08 10:29:37 +02:00
retry + = 1
else :
if not FindingError :
2022-10-05 16:31:47 +02:00
print ( " py: Finding Error in comic list: " + str ( 100 - limit ) + " % " )
2022-07-08 10:29:37 +02:00
skip = 1
limit - = 1
if limit == 0 or FindingError :
2022-10-05 16:31:47 +02:00
print ( " py: Comic with error found, id= " + str ( issue_id + offset ) )
2022-07-08 10:29:37 +02:00
FindingError = True
limit = 1
offset + = 1
comic_skip_cont + = 1
ErrorIds + = " ; " + str ( issue_id + offset )
comics = { }
for issue_id in issues_number . keys ( ) :
2022-10-05 16:31:47 +02:00
if not comics . __contains__ ( issues_volume [ issue_id ] ) :
2022-07-08 10:29:37 +02:00
comics [ issues_volume [ issue_id ] ] = { }
comics [ issues_volume [ issue_id ] ] [ issue_id ] = issues_number [ issue_id ]
2022-10-05 16:31:47 +02:00
print ( " py: Writing missings to file " )
2022-07-08 10:29:37 +02:00
2022-07-15 10:43:24 +02:00
deleted_file = open ( ROOT_DIR + " Deleted_Comics.txt " , " wb " )
2022-07-08 10:29:37 +02:00
for issue_id in non_retrieved_comics . keys ( ) :
2022-10-05 16:31:47 +02:00
deleted_file . write ( ( str ( issue_id ) + " \n " ) . encode ( ) )
2022-07-08 10:29:37 +02:00
deleted_comics_cont + = 1
deleted_file . close ( )
2022-10-05 16:31:47 +02:00
print ( " py: Writing database to file " )
2022-07-08 10:29:37 +02:00
outfile = open ( out_file , " wb " )
2022-10-05 16:31:47 +02:00
outfile . write ( ( " Missing; " + end_date + " \n " ) . encode ( ) )
2022-07-08 10:29:37 +02:00
2022-10-05 16:31:47 +02:00
for volume_id in sorted ( comics ) :
2022-07-08 10:29:37 +02:00
issues = " "
nums = " "
2022-10-05 16:31:47 +02:00
for issue_id in sorted ( comics [ volume_id ] ) :
2022-07-08 10:29:37 +02:00
issues + = str ( issue_id ) + " , "
nums + = comics [ volume_id ] [ issue_id ] + " , "
issues = issues [ : - 1 ]
2022-10-05 16:31:47 +02:00
outfile . write ( ( str ( volume_id ) + " ; " + issues + " ; " + nums + " \n " ) . encode ( ) )
2022-07-08 10:29:37 +02:00
outfile . close ( )
2022-10-05 16:31:47 +02:00
print ( " py: Done! " + str ( new_comics_cont ) + " comics added to database! ( " + str ( comic_skip_cont ) + " skipped and " + str ( old_comics_cont ) + " comics already in database) " )
print ( " py: " + str ( deleted_comics_cont ) + " comics in databased not retrieved in this round. " )
print ( " py: " + str ( updated_comics_cont ) + " comics updated in database. " )
print ( " py: Ids with error in server: " + ErrorIds [ 1 : ] )
print ( " py: " + str ( cont ) )
2022-07-15 09:21:05 +02:00
#raw_input("Press Enter to continue...")