ComicRack_Scripts/Update Missing/update_missing.py

#!/usr/bin/python2

"""
    Ver 2.0
    
    Overview:
    
    This script grabs the newest issues from ComicVine and 'appends' them to the
    missing.mcl file contents.
    
    usage:
    python update_missing.py <in_file> <out_file> <api_key> <start_date> <end_date>
    
        in_file:    missing.mcl (i.e., your most recent version)
        out_file:    missing_<date> (or w/e you want to name it) 
        api_key:    provided by ComicVine
        start_date:    the date the mcl file is synched with (YYYY-MM-DD)
        end_date:    today's date (YYYY-MM-DD)
    
    e.g.,
    python update_missing.py missing.mcl missing_20170917.mcl API_KEY 2017-09-11 2017-09-17
    
    Technical stuff:
    
    The mcl file format contains a header followed by a list of volumes with 
    their respective issues/numbers.
    
        Missing;<date_of_last_update>
        <volume_id>;list of <issue_id>;list of <issue_num>
        <volume_id>;list of <issue_id>;list of <issue_num>
        ...
        <volume_id>;list of <issue_id>;list of <issue_num>
    
    The lists are comma delimited.  Commas followed immediately by a space are 
    not considered a delimiter.  Some issues are numbered like "v. 1, no. 01".
    If there is a space in the list of issue numbers, the entire list is 
    wrapped in double quotes.
    
    Note: There is one volume (id: 77901) that has an issue number "1,5".  This
    can potentially wreak some havoc if not treated carefully.    
"""

import requests
import sys
import os
import re
from datetime import date,datetime

ROOT_DIR = os.path.join(os.path.realpath(os.path.join(os.path.dirname(__file__), '..')), 'Update Missing/')

temp = ""
for i in os.listdir(ROOT_DIR):
    if (i.find("_latest") != -1):
            temp=i
old_date = re.search('[0-9]{8}',temp).group()

in_file = str(ROOT_DIR)+str(temp)
out_file = str(ROOT_DIR)+str(date.today().strftime("%Y%m%d")+"_latest.mcl")
data=[]
with open(ROOT_DIR+"/.apikey", "r") as f:
        data = f.readlines()

api_key = str(data[0].strip('\n'))
start_date = str(datetime.strptime(re.search('[0-9]{8}',temp).group(),"%Y%m%d").strftime("%Y-%m-%d"))
end_date = str(date.today().strftime("%Y-%m-%d"))
today_date = str(date.today().strftime("%Y%m%d"))

f1=open(ROOT_DIR+'log/'+today_date+'.log', 'a')

if len(sys.argv) > 5:
    print "Using argvs"
    in_file = str(sys.argv[1])        # missing.mcl
    out_file = str(sys.argv[2])     # updated_missing.mcl
    api_key = str(sys.argv[3])         # ComicVine API key
    start_date = str(sys.argv[4])     # start date range to search for new issues
    end_date = str(sys.argv[5])        # end date range to search for new issues

comiclist = open(in_file, "r")
issues_number = {}
issues_volume = {}
skip_header = True
cont = 0
exit
print >>f1, "py: Reading in current database"

for line in comiclist:
    if skip_header:
        skip_header = False
        continue
    
    line_split = unicode(line, encoding='utf-8').replace("\n","").split(";")
    volume_id = int(line_split[0])
    
    if (line_split[1][0] == '"') and (line_split[1][len(line_split[1])] == '"'):
        line_split = line_split[1:-1]
    
    issue_split = line_split[1].split(",")
    num_split = line_split[2].split(",")
    
    for i in range(0,len(issue_split)):
        if issues_number.has_key(int(issue_split[i])):
            cont += 1
        issues_number[int(issue_split[i])] = num_split[i]
        issues_volume[int(issue_split[i])] = volume_id

comiclist.close()

print >>f1, "py: Querying ComicVine for new issues"
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
new_comics_cont = 0
old_comics_cont = 0
updated_comics_cont = 0
deleted_comics_cont = 0
comic_skip_cont = 0
offset = 0
max = 100
limit = 100
skip = 0
retry = 0
ErrorIds = ""
non_retrieved_comics = issues_number.copy()
FindingError = False

while offset < max:
    try:
        request_url = "https://comicvine.gamespot.com/api/issues/?api_key=" + api_key +"&limit=" + str(limit) + "&format=json&offset=" + str(offset) + "&field_list=id,issue_number,volume&filter=date_last_updated:" + start_date + "|" + end_date + "&sort=id"
        
        #print request_url
        r = requests.get(request_url, headers=headers)
        json_obj = r.json()

        max = json_obj['number_of_total_results']
       
        print >>f1, "py:" + str(min(offset,max)) + "/" + str(max) + " Since " + start_date
        
        for i in json_obj['results']:
            volume_id = i['volume']['id']
            issue_id = i['id']
            num = unicode(i['issue_number']).replace(",",".&@1").replace(";",".&@2").replace("\n","").replace("\r","")
            
            if not issues_number.has_key(issue_id):
                new_comics_cont += 1
                issues_number[issue_id] = num
                issues_volume[issue_id] = volume_id
            else:
                del non_retrieved_comics[issue_id]
                old_comics_cont += 1
                if issues_number[issue_id] != num or issues_volume[issue_id] != volume_id:
                    updated_comics_cont += 1
                    issues_number[issue_id] = num
                    issues_volume[issue_id] = volume_id

        offset += limit + skip
        
        FindingError = False

        if skip == 1:
            print >>f1, "py: Comic with error found, id= " + str(issue_id+1)
            ErrorIds += ";"+ str(issue_id+1)
            comic_skip_cont += 1
            print >>f1, "py: Continue loading comics now..."
            FindingError = True
        
        skip = 0
        limit = 100
        retry = 0
        
    except:
        if retry < 4 and not FindingError:
            print >>f1, "py: Error. Trying Again..."
            retry += 1
        else:
            
            if not FindingError:
            
                print >>f1, "py: Finding Error in comic list: " + str(100-limit) + "%"
                skip = 1
                limit -= 1
            
            if limit == 0 or FindingError:
                print >>f1, "py: Comic with error found, id= " + str(issue_id+offset)
                FindingError = True
                limit = 1
                offset += 1
                comic_skip_cont += 1
                ErrorIds += ";"+ str(issue_id+offset)

comics = {}
for issue_id in issues_number.keys():
    if not comics.has_key(issues_volume[issue_id]):
        comics[issues_volume[issue_id]] = {}
    comics[issues_volume[issue_id]][issue_id]=issues_number[issue_id]

print >>f1, "py: Writing missings to file"

deleted_file = open(ROOT_DIR+"Deleted_Comics.txt", "wb")

for issue_id in non_retrieved_comics.keys():
    deleted_file.write(str(issue_id)+"\n")
    deleted_comics_cont += 1

deleted_file.close()
    
print >>f1, "py: Writing database to file"

outfile = open(out_file,"wb")
outfile.write("Missing;" + end_date + "\n")

for volume_id in sorted(comics.iterkeys()):
    issues = ""
    nums = ""
    for issue_id in sorted(comics[volume_id].iterkeys()):
        issues += str(issue_id) + ","
        nums += comics[volume_id][issue_id] + ","
    issues = issues[:-1]
    outfile.write(str(volume_id) + ";" + issues + ";" + nums.encode('utf-8','ignore') + "\n")
    
outfile.close()

print >>f1, "py: Done! " + str(new_comics_cont) + " comics added to database! (" + str(comic_skip_cont)+ " skipped and " + str(old_comics_cont) + " comics already in database)" 
print >>f1, "py: " + str(deleted_comics_cont) + " comics in databased not retrieved in this round."
print >>f1, "py: " + str(updated_comics_cont) + " comics updated in database."
print >>f1, "py: Ids with error in server: " + ErrorIds[1:]
print >>f1, "py: " + str(cont)
#raw_input("Press Enter to continue...")
updated bash and python script 2022-07-15 09:21:05 +02:00			`#!/usr/bin/python2`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00
			`"""`
			`Ver 2.0`

			`Overview:`

			`This script grabs the newest issues from ComicVine and 'appends' them to the`
			`missing.mcl file contents.`

			`usage:`
			`python update_missing.py <in_file> <out_file> <api_key> <start_date> <end_date>`

			`in_file: missing.mcl (i.e., your most recent version)`
			`out_file: missing_<date> (or w/e you want to name it)`
			`api_key: provided by ComicVine`
			`start_date: the date the mcl file is synched with (YYYY-MM-DD)`
			`end_date: today's date (YYYY-MM-DD)`

			`e.g.,`
			`python update_missing.py missing.mcl missing_20170917.mcl API_KEY 2017-09-11 2017-09-17`

			`Technical stuff:`

			`The mcl file format contains a header followed by a list of volumes with`
			`their respective issues/numbers.`

			`Missing;<date_of_last_update>`
			`<volume_id>;list of <issue_id>;list of <issue_num>`
			`<volume_id>;list of <issue_id>;list of <issue_num>`
			`...`
			`<volume_id>;list of <issue_id>;list of <issue_num>`

			`The lists are comma delimited. Commas followed immediately by a space are`
			`not considered a delimiter. Some issues are numbered like "v. 1, no. 01".`
			`If there is a space in the list of issue numbers, the entire list is`
			`wrapped in double quotes.`

			`Note: There is one volume (id: 77901) that has an issue number "1,5". This`
			`can potentially wreak some havoc if not treated carefully.`
			`"""`

			`import requests`
			`import sys`
updated bash and python script 2022-07-15 09:21:05 +02:00			`import os`
			`import re`
			`from datetime import date,datetime`

updated bash and python script 2022-07-15 10:43:24 +02:00			`ROOT_DIR = os.path.join(os.path.realpath(os.path.join(os.path.dirname(__file__), '..')), 'Update Missing/')`

updated bash and python script 2022-07-15 09:21:05 +02:00			`temp = ""`
updated bash and python script 2022-07-15 10:43:24 +02:00			`for i in os.listdir(ROOT_DIR):`
updated bash and python script 2022-07-15 09:21:05 +02:00			`if (i.find("_latest") != -1):`
			`temp=i`
			`old_date = re.search('[0-9]{8}',temp).group()`

updated bash and python script 2022-07-15 10:43:24 +02:00			`in_file = str(ROOT_DIR)+str(temp)`
			`out_file = str(ROOT_DIR)+str(date.today().strftime("%Y%m%d")+"_latest.mcl")`
updated bash and python script 2022-07-15 09:21:05 +02:00			`data=[]`
updated bash and python script 2022-07-15 10:43:24 +02:00			`with open(ROOT_DIR+"/.apikey", "r") as f:`
updated bash and python script 2022-07-15 09:21:05 +02:00			`data = f.readlines()`

			`api_key = str(data[0].strip('\n'))`
			`start_date = str(datetime.strptime(re.search('[0-9]{8}',temp).group(),"%Y%m%d").strftime("%Y-%m-%d"))`
			`end_date = str(date.today().strftime("%Y-%m-%d"))`
updated gitignore 2022-07-15 09:53:26 +02:00			`today_date = str(date.today().strftime("%Y%m%d"))`
updated bash and python script 2022-07-15 09:21:05 +02:00
updated bash and python script 2022-07-15 10:43:24 +02:00			`f1=open(ROOT_DIR+'log/'+today_date+'.log', 'a')`
updated bash and python script 2022-07-15 09:21:05 +02:00
			`if len(sys.argv) > 5:`
			`print "Using argvs"`
			`in_file = str(sys.argv[1]) # missing.mcl`
			`out_file = str(sys.argv[2]) # updated_missing.mcl`
			`api_key = str(sys.argv[3]) # ComicVine API key`
			`start_date = str(sys.argv[4]) # start date range to search for new issues`
			`end_date = str(sys.argv[5]) # end date range to search for new issues`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00
			`comiclist = open(in_file, "r")`
			`issues_number = {}`
			`issues_volume = {}`
			`skip_header = True`
			`cont = 0`
updated bash and python script 2022-07-15 10:43:24 +02:00			`exit`
updated gitignore 2022-07-15 09:53:26 +02:00			`print >>f1, "py: Reading in current database"`

Added UpdateMissing script 2022-07-08 10:29:37 +02:00			`for line in comiclist:`
			`if skip_header:`
			`skip_header = False`
			`continue`

			`line_split = unicode(line, encoding='utf-8').replace("\n","").split(";")`
			`volume_id = int(line_split[0])`

			`if (line_split[1][0] == '"') and (line_split[1][len(line_split[1])] == '"'):`
			`line_split = line_split[1:-1]`

			`issue_split = line_split[1].split(",")`
			`num_split = line_split[2].split(",")`

			`for i in range(0,len(issue_split)):`
			`if issues_number.has_key(int(issue_split[i])):`
			`cont += 1`
			`issues_number[int(issue_split[i])] = num_split[i]`
			`issues_volume[int(issue_split[i])] = volume_id`

			`comiclist.close()`

updated gitignore 2022-07-15 09:53:26 +02:00			`print >>f1, "py: Querying ComicVine for new issues"`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00			`headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}`
			`new_comics_cont = 0`
			`old_comics_cont = 0`
			`updated_comics_cont = 0`
			`deleted_comics_cont = 0`
			`comic_skip_cont = 0`
			`offset = 0`
			`max = 100`
			`limit = 100`
			`skip = 0`
			`retry = 0`
			`ErrorIds = ""`
			`non_retrieved_comics = issues_number.copy()`
			`FindingError = False`

			`while offset < max:`
			`try:`
			`request_url = "https://comicvine.gamespot.com/api/issues/?api_key=" + api_key +"&limit=" + str(limit) + "&format=json&offset=" + str(offset) + "&field_list=id,issue_number,volume&filter=date_last_updated:" + start_date + "\|" + end_date + "&sort=id"`

updated gitignore 2022-07-15 09:53:26 +02:00			`#print request_url`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00			`r = requests.get(request_url, headers=headers)`
			`json_obj = r.json()`

			`max = json_obj['number_of_total_results']`

updated gitignore 2022-07-15 09:53:26 +02:00			`print >>f1, "py:" + str(min(offset,max)) + "/" + str(max) + " Since " + start_date`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00
			`for i in json_obj['results']:`
			`volume_id = i['volume']['id']`
			`issue_id = i['id']`
			`num = unicode(i['issue_number']).replace(",",".&@1").replace(";",".&@2").replace("\n","").replace("\r","")`

			`if not issues_number.has_key(issue_id):`
			`new_comics_cont += 1`
			`issues_number[issue_id] = num`
			`issues_volume[issue_id] = volume_id`
			`else:`
			`del non_retrieved_comics[issue_id]`
			`old_comics_cont += 1`
			`if issues_number[issue_id] != num or issues_volume[issue_id] != volume_id:`
			`updated_comics_cont += 1`
			`issues_number[issue_id] = num`
			`issues_volume[issue_id] = volume_id`

			`offset += limit + skip`

			`FindingError = False`

			`if skip == 1:`
updated gitignore 2022-07-15 09:53:26 +02:00			`print >>f1, "py: Comic with error found, id= " + str(issue_id+1)`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00			`ErrorIds += ";"+ str(issue_id+1)`
			`comic_skip_cont += 1`
updated gitignore 2022-07-15 09:53:26 +02:00			`print >>f1, "py: Continue loading comics now..."`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00			`FindingError = True`

			`skip = 0`
			`limit = 100`
			`retry = 0`

			`except:`
			`if retry < 4 and not FindingError:`
updated gitignore 2022-07-15 09:53:26 +02:00			`print >>f1, "py: Error. Trying Again..."`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00			`retry += 1`
			`else:`

			`if not FindingError:`

updated gitignore 2022-07-15 09:53:26 +02:00			`print >>f1, "py: Finding Error in comic list: " + str(100-limit) + "%"`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00			`skip = 1`
			`limit -= 1`

			`if limit == 0 or FindingError:`
updated gitignore 2022-07-15 09:53:26 +02:00			`print >>f1, "py: Comic with error found, id= " + str(issue_id+offset)`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00			`FindingError = True`
			`limit = 1`
			`offset += 1`
			`comic_skip_cont += 1`
			`ErrorIds += ";"+ str(issue_id+offset)`

			`comics = {}`
			`for issue_id in issues_number.keys():`
			`if not comics.has_key(issues_volume[issue_id]):`
			`comics[issues_volume[issue_id]] = {}`
			`comics[issues_volume[issue_id]][issue_id]=issues_number[issue_id]`

updated gitignore 2022-07-15 09:53:26 +02:00			`print >>f1, "py: Writing missings to file"`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00
updated bash and python script 2022-07-15 10:43:24 +02:00			`deleted_file = open(ROOT_DIR+"Deleted_Comics.txt", "wb")`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00
			`for issue_id in non_retrieved_comics.keys():`
			`deleted_file.write(str(issue_id)+"\n")`
			`deleted_comics_cont += 1`

			`deleted_file.close()`

updated gitignore 2022-07-15 09:53:26 +02:00			`print >>f1, "py: Writing database to file"`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00
			`outfile = open(out_file,"wb")`
			`outfile.write("Missing;" + end_date + "\n")`

			`for volume_id in sorted(comics.iterkeys()):`
			`issues = ""`
			`nums = ""`
			`for issue_id in sorted(comics[volume_id].iterkeys()):`
			`issues += str(issue_id) + ","`
			`nums += comics[volume_id][issue_id] + ","`
			`issues = issues[:-1]`
			`outfile.write(str(volume_id) + ";" + issues + ";" + nums.encode('utf-8','ignore') + "\n")`

			`outfile.close()`

updated gitignore 2022-07-15 09:53:26 +02:00			`print >>f1, "py: Done! " + str(new_comics_cont) + " comics added to database! (" + str(comic_skip_cont)+ " skipped and " + str(old_comics_cont) + " comics already in database)"`
			`print >>f1, "py: " + str(deleted_comics_cont) + " comics in databased not retrieved in this round."`
			`print >>f1, "py: " + str(updated_comics_cont) + " comics updated in database."`
			`print >>f1, "py: Ids with error in server: " + ErrorIds[1:]`
			`print >>f1, "py: " + str(cont)`
updated bash and python script 2022-07-15 09:21:05 +02:00			`#raw_input("Press Enter to continue...")`