ComicRack_Scripts/Update Missing/update_missing.py

#!/usr/bin/python2

"""
    Ver 2.0
    
    Overview:
    
    This script grabs the newest issues from ComicVine and 'appends' them to the
    missing.mcl file contents.
    
    usage:
    python update_missing.py <in_file> <out_file> <api_key> <start_date> <end_date>
    
        in_file:    missing.mcl (i.e., your most recent version)
        out_file:    missing_<date> (or w/e you want to name it) 
        api_key:    provided by ComicVine
        start_date:    the date the mcl file is synched with (YYYY-MM-DD)
        end_date:    today's date (YYYY-MM-DD)
    
    e.g.,
    python update_missing.py missing.mcl missing_20170917.mcl API_KEY 2017-09-11 2017-09-17
    
    Technical stuff:
    
    The mcl file format contains a header followed by a list of volumes with 
    their respective issues/numbers.
    
        Missing;<date_of_last_update>
        <volume_id>;list of <issue_id>;list of <issue_num>
        <volume_id>;list of <issue_id>;list of <issue_num>
        ...
        <volume_id>;list of <issue_id>;list of <issue_num>
    
    The lists are comma delimited.  Commas followed immediately by a space are 
    not considered a delimiter.  Some issues are numbered like "v. 1, no. 01".
    If there is a space in the list of issue numbers, the entire list is 
    wrapped in double quotes.
    
    Note: There is one volume (id: 77901) that has an issue number "1,5".  This
    can potentially wreak some havoc if not treated carefully.    
"""

import requests
import sys
import os
import io
import re
from datetime import date,datetime

ROOT_DIR = os.path.join(os.path.realpath(os.path.join(os.path.dirname(__file__), '..')), 'Update Missing/')

temp = ""
for i in os.listdir(ROOT_DIR):
    if (i.find("_latest") != -1):
            temp=i
old_date = re.search('[0-9]{8}',temp).group()

in_file = str(ROOT_DIR)+str(temp)
out_file = str(ROOT_DIR)+str(date.today().strftime("%Y%m%d")+"_latest.mcl")
data=[]
with open(ROOT_DIR+"/.apikey", "r") as f:
        data = f.readlines()

api_key = str(data[0].strip('\n'))
start_date = str(datetime.strptime(re.search('[0-9]{8}',temp).group(),"%Y%m%d").strftime("%Y-%m-%d"))
end_date = str(date.today().strftime("%Y-%m-%d"))
today_date = str(date.today().strftime("%Y%m%d"))

f1=open(ROOT_DIR+'log/'+today_date+'.log', 'a')

if len(sys.argv) > 5:
    print("Using argvs")
    in_file = str(sys.argv[1])        # missing.mcl
    out_file = str(sys.argv[2])     # updated_missing.mcl
    api_key = str(sys.argv[3])         # ComicVine API key
    start_date = str(sys.argv[4])     # start date range to search for new issues
    end_date = str(sys.argv[5])        # end date range to search for new issues

comiclist = open(in_file, "r")
issues_number = {}
issues_volume = {}
skip_header = True
cont = 0
exit
print("py: Reading in current database")

for line in comiclist:
    if skip_header:
        skip_header = False
        continue
    
    line_split = line.replace("\n","").split(";")
    volume_id = int(line_split[0])
    
    if (line_split[1][0] == '"') and (line_split[1][len(line_split[1])] == '"'):
        line_split = line_split[1:-1]
    
    issue_split = line_split[1].split(",")
    num_split = line_split[2].split(",")
    
    for i in range(0,len(issue_split)):
        #if issues_number.has_key(int(issue_split[i])):
        #if int(issue_split[i]) in issue_number:
        if issues_number.__contains__(int(issue_split[i])):
            cont += 1
        issues_number[int(issue_split[i])] = num_split[i]
        issues_volume[int(issue_split[i])] = volume_id

comiclist.close()

print("py: Querying ComicVine for new issues")
headers = {'User-Agent': 'Update Missing/ (https://gitea.baerentsen.space/FrederikBaerentsen/ComicRack_Scripts/src/branch/master/Update Missing)'}
new_comics_cont = 0
old_comics_cont = 0
updated_comics_cont = 0
deleted_comics_cont = 0
comic_skip_cont = 0
offset = 0
max = 100
limit = 100
skip = 0
retry = 0
ErrorIds = ""
non_retrieved_comics = issues_number.copy()
FindingError = False

while offset < max:
    try:
        request_url = "https://comicvine.gamespot.com/api/issues/?api_key=" + api_key +"&limit=" + str(limit) + "&format=json&offset=" + str(offset) + "&field_list=id,issue_number,volume&filter=date_last_updated:" + start_date + "|" + end_date + "&sort=id"
        
        #print request_url
        r = requests.get(request_url, headers=headers)
        json_obj = r.json()

        max = json_obj['number_of_total_results']
       
        print("py:" + str(min(offset,max)) + "/" + str(max) + " Since " + start_date)
        
        for i in json_obj['results']:
            volume_id = i['volume']['id']
            issue_id = i['id']
            num = i['issue_number'].replace(",",".&@1").replace(";",".&@2").replace("\n","").replace("\r","")
            
            if not issues_number.__contains__(issue_id):
                new_comics_cont += 1
                issues_number[issue_id] = num
                issues_volume[issue_id] = volume_id
            else:
                del non_retrieved_comics[issue_id]
                old_comics_cont += 1
                if issues_number[issue_id] != num or issues_volume[issue_id] != volume_id:
                    updated_comics_cont += 1
                    issues_number[issue_id] = num
                    issues_volume[issue_id] = volume_id

        offset += limit + skip
        
        FindingError = False

        if skip == 1:
            print("py: Comic with error found, id= " + str(issue_id+1))
            ErrorIds += ";"+ str(issue_id+1)
            comic_skip_cont += 1
            print("py: Continue loading comics now...")
            FindingError = True
        
        skip = 0
        limit = 100
        retry = 0
        
    except:
        if retry < 4 and not FindingError:
            print("py: Error. Trying Again...")
            retry += 1
        else:
            
            if not FindingError:
            
                print("py: Finding Error in comic list: " + str(100-limit) + "%")
                skip = 1
                limit -= 1
            
            if limit == 0 or FindingError:
                print("py: Comic with error found, id= " + str(issue_id+offset))
                FindingError = True
                limit = 1
                offset += 1
                comic_skip_cont += 1
                ErrorIds += ";"+ str(issue_id+offset)

comics = {}
for issue_id in issues_number.keys():
    if not comics.__contains__(issues_volume[issue_id]):
        comics[issues_volume[issue_id]] = {}
    comics[issues_volume[issue_id]][issue_id]=issues_number[issue_id]

print("py: Writing missings to file")

deleted_file = open(ROOT_DIR+"Deleted_Comics.txt", "wb")

for issue_id in non_retrieved_comics.keys():
    deleted_file.write((str(issue_id)+"\n").encode())
    deleted_comics_cont += 1

deleted_file.close()
    
print("py: Writing database to file")

outfile = io.open(out_file,"w",encoding="utf8")
outfile.write(("Missing;" + end_date + "\n"))

for volume_id in sorted(comics):
    issues = ""
    nums = ""
    for issue_id in sorted(comics[volume_id]):
        issues += str(issue_id) + ","
        nums += comics[volume_id][issue_id] + ","
    issues = issues[:-1]
    outfile.write((str(volume_id) + ";" + issues + ";" + nums + "\n"))
    
outfile.close()

print("py: Done! " + str(new_comics_cont) + " comics added to database! (" + str(comic_skip_cont)+ " skipped and " + str(old_comics_cont) + " comics already in database)")
print("py: " + str(deleted_comics_cont) + " comics in databased not retrieved in this round.")
print("py: " + str(updated_comics_cont) + " comics updated in database.")
print("py: Ids with error in server: " + ErrorIds[1:])
print("py: " + str(cont))
#raw_input("Press Enter to continue...")
updated bash and python script 2022-07-15 09:21:05 +02:00			`#!/usr/bin/python2`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00
			`"""`
			`Ver 2.0`

			`Overview:`

			`This script grabs the newest issues from ComicVine and 'appends' them to the`
			`missing.mcl file contents.`

			`usage:`
			`python update_missing.py <in_file> <out_file> <api_key> <start_date> <end_date>`

			`in_file: missing.mcl (i.e., your most recent version)`
			`out_file: missing_<date> (or w/e you want to name it)`
			`api_key: provided by ComicVine`
			`start_date: the date the mcl file is synched with (YYYY-MM-DD)`
			`end_date: today's date (YYYY-MM-DD)`

			`e.g.,`
			`python update_missing.py missing.mcl missing_20170917.mcl API_KEY 2017-09-11 2017-09-17`

			`Technical stuff:`

			`The mcl file format contains a header followed by a list of volumes with`
			`their respective issues/numbers.`

			`Missing;<date_of_last_update>`
			`<volume_id>;list of <issue_id>;list of <issue_num>`
			`<volume_id>;list of <issue_id>;list of <issue_num>`
			`...`
			`<volume_id>;list of <issue_id>;list of <issue_num>`

			`The lists are comma delimited. Commas followed immediately by a space are`
			`not considered a delimiter. Some issues are numbered like "v. 1, no. 01".`
			`If there is a space in the list of issue numbers, the entire list is`
			`wrapped in double quotes.`

			`Note: There is one volume (id: 77901) that has an issue number "1,5". This`
			`can potentially wreak some havoc if not treated carefully.`
			`"""`

			`import requests`
			`import sys`
updated bash and python script 2022-07-15 09:21:05 +02:00			`import os`
Updated user agent 2024-11-12 16:49:10 +01:00			`import io`
updated bash and python script 2022-07-15 09:21:05 +02:00			`import re`
			`from datetime import date,datetime`

updated bash and python script 2022-07-15 10:43:24 +02:00			`ROOT_DIR = os.path.join(os.path.realpath(os.path.join(os.path.dirname(__file__), '..')), 'Update Missing/')`

updated bash and python script 2022-07-15 09:21:05 +02:00			`temp = ""`
updated bash and python script 2022-07-15 10:43:24 +02:00			`for i in os.listdir(ROOT_DIR):`
updated bash and python script 2022-07-15 09:21:05 +02:00			`if (i.find("_latest") != -1):`
			`temp=i`
			`old_date = re.search('[0-9]{8}',temp).group()`

updated bash and python script 2022-07-15 10:43:24 +02:00			`in_file = str(ROOT_DIR)+str(temp)`
			`out_file = str(ROOT_DIR)+str(date.today().strftime("%Y%m%d")+"_latest.mcl")`
updated bash and python script 2022-07-15 09:21:05 +02:00			`data=[]`
updated bash and python script 2022-07-15 10:43:24 +02:00			`with open(ROOT_DIR+"/.apikey", "r") as f:`
updated bash and python script 2022-07-15 09:21:05 +02:00			`data = f.readlines()`

			`api_key = str(data[0].strip('\n'))`
			`start_date = str(datetime.strptime(re.search('[0-9]{8}',temp).group(),"%Y%m%d").strftime("%Y-%m-%d"))`
			`end_date = str(date.today().strftime("%Y-%m-%d"))`
updated gitignore 2022-07-15 09:53:26 +02:00			`today_date = str(date.today().strftime("%Y%m%d"))`
updated bash and python script 2022-07-15 09:21:05 +02:00
updated bash and python script 2022-07-15 10:43:24 +02:00			`f1=open(ROOT_DIR+'log/'+today_date+'.log', 'a')`
updated bash and python script 2022-07-15 09:21:05 +02:00
			`if len(sys.argv) > 5:`
updated to python3 2022-10-05 16:31:47 +02:00			`print("Using argvs")`
updated bash and python script 2022-07-15 09:21:05 +02:00			`in_file = str(sys.argv[1]) # missing.mcl`
			`out_file = str(sys.argv[2]) # updated_missing.mcl`
			`api_key = str(sys.argv[3]) # ComicVine API key`
			`start_date = str(sys.argv[4]) # start date range to search for new issues`
			`end_date = str(sys.argv[5]) # end date range to search for new issues`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00
			`comiclist = open(in_file, "r")`
			`issues_number = {}`
			`issues_volume = {}`
			`skip_header = True`
			`cont = 0`
updated bash and python script 2022-07-15 10:43:24 +02:00			`exit`
updated to python3 2022-10-05 16:31:47 +02:00			`print("py: Reading in current database")`
updated gitignore 2022-07-15 09:53:26 +02:00
Added UpdateMissing script 2022-07-08 10:29:37 +02:00			`for line in comiclist:`
			`if skip_header:`
			`skip_header = False`
			`continue`

updated to python3 2022-10-05 16:31:47 +02:00			`line_split = line.replace("\n","").split(";")`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00			`volume_id = int(line_split[0])`

			`if (line_split[1][0] == '"') and (line_split[1][len(line_split[1])] == '"'):`
			`line_split = line_split[1:-1]`

			`issue_split = line_split[1].split(",")`
			`num_split = line_split[2].split(",")`

			`for i in range(0,len(issue_split)):`
updated to python3 2022-10-05 16:31:47 +02:00			`#if issues_number.has_key(int(issue_split[i])):`
			`#if int(issue_split[i]) in issue_number:`
			`if issues_number.__contains__(int(issue_split[i])):`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00			`cont += 1`
			`issues_number[int(issue_split[i])] = num_split[i]`
			`issues_volume[int(issue_split[i])] = volume_id`

			`comiclist.close()`

updated to python3 2022-10-05 16:31:47 +02:00			`print("py: Querying ComicVine for new issues")`
Updated user agent 2024-11-12 16:50:03 +01:00			`headers = {'User-Agent': 'Update Missing/ (https://gitea.baerentsen.space/FrederikBaerentsen/ComicRack_Scripts/src/branch/master/Update Missing)'}`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00			`new_comics_cont = 0`
			`old_comics_cont = 0`
			`updated_comics_cont = 0`
			`deleted_comics_cont = 0`
			`comic_skip_cont = 0`
			`offset = 0`
			`max = 100`
			`limit = 100`
			`skip = 0`
			`retry = 0`
			`ErrorIds = ""`
			`non_retrieved_comics = issues_number.copy()`
			`FindingError = False`

			`while offset < max:`
			`try:`
			`request_url = "https://comicvine.gamespot.com/api/issues/?api_key=" + api_key +"&limit=" + str(limit) + "&format=json&offset=" + str(offset) + "&field_list=id,issue_number,volume&filter=date_last_updated:" + start_date + "\|" + end_date + "&sort=id"`

updated gitignore 2022-07-15 09:53:26 +02:00			`#print request_url`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00			`r = requests.get(request_url, headers=headers)`
			`json_obj = r.json()`

			`max = json_obj['number_of_total_results']`

updated to python3 2022-10-05 16:31:47 +02:00			`print("py:" + str(min(offset,max)) + "/" + str(max) + " Since " + start_date)`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00
			`for i in json_obj['results']:`
			`volume_id = i['volume']['id']`
			`issue_id = i['id']`
updated to python3 2022-10-05 16:31:47 +02:00			`num = i['issue_number'].replace(",",".&@1").replace(";",".&@2").replace("\n","").replace("\r","")`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00
updated to python3 2022-10-05 16:31:47 +02:00			`if not issues_number.__contains__(issue_id):`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00			`new_comics_cont += 1`
			`issues_number[issue_id] = num`
			`issues_volume[issue_id] = volume_id`
			`else:`
			`del non_retrieved_comics[issue_id]`
			`old_comics_cont += 1`
			`if issues_number[issue_id] != num or issues_volume[issue_id] != volume_id:`
			`updated_comics_cont += 1`
			`issues_number[issue_id] = num`
			`issues_volume[issue_id] = volume_id`

			`offset += limit + skip`

			`FindingError = False`

			`if skip == 1:`
updated to python3 2022-10-05 16:31:47 +02:00			`print("py: Comic with error found, id= " + str(issue_id+1))`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00			`ErrorIds += ";"+ str(issue_id+1)`
			`comic_skip_cont += 1`
updated to python3 2022-10-05 16:31:47 +02:00			`print("py: Continue loading comics now...")`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00			`FindingError = True`

			`skip = 0`
			`limit = 100`
			`retry = 0`

			`except:`
			`if retry < 4 and not FindingError:`
updated to python3 2022-10-05 16:31:47 +02:00			`print("py: Error. Trying Again...")`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00			`retry += 1`
			`else:`

			`if not FindingError:`

updated to python3 2022-10-05 16:31:47 +02:00			`print("py: Finding Error in comic list: " + str(100-limit) + "%")`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00			`skip = 1`
			`limit -= 1`

			`if limit == 0 or FindingError:`
updated to python3 2022-10-05 16:31:47 +02:00			`print("py: Comic with error found, id= " + str(issue_id+offset))`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00			`FindingError = True`
			`limit = 1`
			`offset += 1`
			`comic_skip_cont += 1`
			`ErrorIds += ";"+ str(issue_id+offset)`

			`comics = {}`
			`for issue_id in issues_number.keys():`
updated to python3 2022-10-05 16:31:47 +02:00			`if not comics.__contains__(issues_volume[issue_id]):`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00			`comics[issues_volume[issue_id]] = {}`
			`comics[issues_volume[issue_id]][issue_id]=issues_number[issue_id]`

updated to python3 2022-10-05 16:31:47 +02:00			`print("py: Writing missings to file")`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00
updated bash and python script 2022-07-15 10:43:24 +02:00			`deleted_file = open(ROOT_DIR+"Deleted_Comics.txt", "wb")`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00
			`for issue_id in non_retrieved_comics.keys():`
updated to python3 2022-10-05 16:31:47 +02:00			`deleted_file.write((str(issue_id)+"\n").encode())`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00			`deleted_comics_cont += 1`

			`deleted_file.close()`

updated to python3 2022-10-05 16:31:47 +02:00			`print("py: Writing database to file")`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00
Updated user agent 2024-11-12 16:49:10 +01:00			`outfile = io.open(out_file,"w",encoding="utf8")`
			`outfile.write(("Missing;" + end_date + "\n"))`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00
updated to python3 2022-10-05 16:31:47 +02:00			`for volume_id in sorted(comics):`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00			`issues = ""`
			`nums = ""`
updated to python3 2022-10-05 16:31:47 +02:00			`for issue_id in sorted(comics[volume_id]):`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00			`issues += str(issue_id) + ","`
			`nums += comics[volume_id][issue_id] + ","`
			`issues = issues[:-1]`
Updated user agent 2024-11-12 16:49:10 +01:00			`outfile.write((str(volume_id) + ";" + issues + ";" + nums + "\n"))`
Added UpdateMissing script 2022-07-08 10:29:37 +02:00
			`outfile.close()`

updated to python3 2022-10-05 16:31:47 +02:00			`print("py: Done! " + str(new_comics_cont) + " comics added to database! (" + str(comic_skip_cont)+ " skipped and " + str(old_comics_cont) + " comics already in database)")`
			`print("py: " + str(deleted_comics_cont) + " comics in databased not retrieved in this round.")`
			`print("py: " + str(updated_comics_cont) + " comics updated in database.")`
			`print("py: Ids with error in server: " + ErrorIds[1:])`
			`print("py: " + str(cont))`
updated bash and python script 2022-07-15 09:21:05 +02:00			`#raw_input("Press Enter to continue...")`