Added UpdateMissing script

2022-07-08 10:29:37 +02:00
parent 3de3d1e564
commit 7fe9c3e500
9 changed files with 251644 additions and 0 deletions
@@ -0,0 +1,11 @@
+@echo off
+echo Date format = %date%
+setlocal enabledelayedexpansion
+set cnt=0
+for %%i in (*.mcl) do (
+    set "num=%%~ni"
+    if !num! GTR !cnt! set cnt=!num!
+)
+set /a olddate=cnt
+set /a olddate2=cnt-1
+py -2 update_missing.py %olddate%.mcl %date:~6,4%%date:~3,2%%date:~0,2%.mcl <api key> %olddate2:~0,4%-%olddate2:~4,2%-%olddate2:~6,2% %date:~6,4%-%date:~3,2%-%date:~0,2%
@@ -0,0 +1,11 @@
+@echo off
+echo Date format = %date%
+setlocal enabledelayedexpansion
+set cnt=0
+for %%i in (*.mcl) do (
+    set "num=%%~ni"
+    if !num! GTR !cnt! set cnt=!num!
+)
+set /a olddate=cnt
+set /a olddate2=cnt-1
+py -2 update_missing.py %olddate%.mcl %date:~6,4%%date:~3,2%%date:~0,2%.mcl  9718933c0a3aa4407d6995d0f3bb310f5ee977f2 %olddate2:~0,4%-%olddate2:~4,2%-%olddate2:~6,2% %date:~6,4%-%date:~3,2%-%date:~0,2%
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+d=`date +%Y%m%d`
+
+echo "Todays date $d"
+
+#mv "missing.mcl" "missing_old.mcl"
+
+fromdate=$(ls -1 *_latest.mcl |grep -Eo '[[:digit:]]{8}')
+
+echo "Last date run $fromdate"
+
+
+python2 "update_missing.py" "$fromdate""_latest.mcl" "$d"".mcl" <API KEY> $fromdate $d
+
+cp "$d"".mcl" "$d""_latest.mcl"
+
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+d=`date +%Y%m%d`
+
+echo "Todays date $d"
+
+#mv "missing.mcl" "missing_old.mcl"
+
+fromdate=$(ls -1 *_latest.mcl |grep -Eo '[[:digit:]]{8}')
+
+echo "Last date run $fromdate"
+
+
+py -2 "update_missing.py" "$fromdate""_latest.mcl" "$d"".mcl" 9718933c0a3aa4407d6995d0f3bb310f5ee977f2 $fromdate $d
+
+cp "$d"".mcl" "$d""_latest.mcl"
+
@@ -0,0 +1,202 @@
+#!/usr/bin/env python
+
+"""
+    Ver 2.0
+    
+    Overview:
+    
+    This script grabs the newest issues from ComicVine and 'appends' them to the
+    missing.mcl file contents.
+    
+    usage:
+    python update_missing.py <in_file> <out_file> <api_key> <start_date> <end_date>
+    
+        in_file:    missing.mcl (i.e., your most recent version)
+        out_file:    missing_<date> (or w/e you want to name it) 
+        api_key:    provided by ComicVine
+        start_date:    the date the mcl file is synched with (YYYY-MM-DD)
+        end_date:    today's date (YYYY-MM-DD)
+    
+    e.g.,
+    python update_missing.py missing.mcl missing_20170917.mcl API_KEY 2017-09-11 2017-09-17
+    
+    Technical stuff:
+    
+    The mcl file format contains a header followed by a list of volumes with 
+    their respective issues/numbers.
+    
+        Missing;<date_of_last_update>
+        <volume_id>;list of <issue_id>;list of <issue_num>
+        <volume_id>;list of <issue_id>;list of <issue_num>
+        ...
+        <volume_id>;list of <issue_id>;list of <issue_num>
+    
+    The lists are comma delimited.  Commas followed immediately by a space are 
+    not considered a delimiter.  Some issues are numbered like "v. 1, no. 01".
+    If there is a space in the list of issue numbers, the entire list is 
+    wrapped in double quotes.
+    
+    Note: There is one volume (id: 77901) that has an issue number "1,5".  This
+    can potentially wreak some havoc if not treated carefully.    
+"""
+
+import requests
+import sys
+
+if len(sys.argv) < 6 :
+    print ("usage: python update_missing.py <in_file> <out_file> <api_key> <start_date> <end_date>")
+    exit()
+
+in_file = str(sys.argv[1])        # missing.mcl
+out_file = str(sys.argv[2])     # updated_missing.mcl
+api_key = str(sys.argv[3])         # ComicVine API key
+start_date = str(sys.argv[4])     # start date range to search for new issues
+end_date = str(sys.argv[5])        # end date range to search for new issues
+
+comiclist = open(in_file, "r")
+issues_number = {}
+issues_volume = {}
+skip_header = True
+cont = 0
+
+print ("Reading in current database")
+for line in comiclist:
+    if skip_header:
+        skip_header = False
+        continue
+    
+    line_split = unicode(line, encoding='utf-8').replace("\n","").split(";")
+    volume_id = int(line_split[0])
+    
+    if (line_split[1][0] == '"') and (line_split[1][len(line_split[1])] == '"'):
+        line_split = line_split[1:-1]
+    
+    issue_split = line_split[1].split(",")
+    num_split = line_split[2].split(",")
+    
+    for i in range(0,len(issue_split)):
+        if issues_number.has_key(int(issue_split[i])):
+            cont += 1
+        issues_number[int(issue_split[i])] = num_split[i]
+        issues_volume[int(issue_split[i])] = volume_id
+
+comiclist.close()
+
+print ("Querying ComicVine for new issues")
+headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
+new_comics_cont = 0
+old_comics_cont = 0
+updated_comics_cont = 0
+deleted_comics_cont = 0
+comic_skip_cont = 0
+offset = 0
+max = 100
+limit = 100
+skip = 0
+retry = 0
+ErrorIds = ""
+non_retrieved_comics = issues_number.copy()
+FindingError = False
+
+while offset < max:
+    try:
+        request_url = "https://comicvine.gamespot.com/api/issues/?api_key=" + api_key +"&limit=" + str(limit) + "&format=json&offset=" + str(offset) + "&field_list=id,issue_number,volume&filter=date_last_updated:" + start_date + "|" + end_date + "&sort=id"
+        
+        '''print request_url'''
+        r = requests.get(request_url, headers=headers)
+        json_obj = r.json()
+
+        max = json_obj['number_of_total_results']
+       
+        print (str(min(offset,max)) + "/" + str(max) + " Since " + start_date)
+        
+        for i in json_obj['results']:
+            volume_id = i['volume']['id']
+            issue_id = i['id']
+            num = unicode(i['issue_number']).replace(",",".&@1").replace(";",".&@2").replace("\n","").replace("\r","")
+            
+            if not issues_number.has_key(issue_id):
+                new_comics_cont += 1
+                issues_number[issue_id] = num
+                issues_volume[issue_id] = volume_id
+            else:
+                del non_retrieved_comics[issue_id]
+                old_comics_cont += 1
+                if issues_number[issue_id] != num or issues_volume[issue_id] != volume_id:
+                    updated_comics_cont += 1
+                    issues_number[issue_id] = num
+                    issues_volume[issue_id] = volume_id
+
+        offset += limit + skip
+        
+        FindingError = False
+
+        if skip == 1:
+            print ("Comic with error found, id= " + str(issue_id+1))
+            ErrorIds += ";"+ str(issue_id+1)
+            comic_skip_cont += 1
+            print ("Continue loading comics now...")
+            FindingError = True
+        
+        skip = 0
+        limit = 100
+        retry = 0
+        
+    except:
+        if retry < 4 and not FindingError:
+            print ("Error. Trying Again...")
+            retry += 1
+        else:
+            
+            if not FindingError:
+            
+                print ("Finding Error in comic list: " + str(100-limit) + "%")
+                skip = 1
+                limit -= 1
+            
+            if limit == 0 or FindingError:
+                print ("Comic with error found, id= " + str(issue_id+offset))
+                FindingError = True
+                limit = 1
+                offset += 1
+                comic_skip_cont += 1
+                ErrorIds += ";"+ str(issue_id+offset)
+
+comics = {}
+for issue_id in issues_number.keys():
+    if not comics.has_key(issues_volume[issue_id]):
+        comics[issues_volume[issue_id]] = {}
+    comics[issues_volume[issue_id]][issue_id]=issues_number[issue_id]
+
+print ("Writing missings to file")
+
+deleted_file = open("Deleted_Comics.txt", "wb")
+
+for issue_id in non_retrieved_comics.keys():
+    deleted_file.write(str(issue_id)+"\n")
+    deleted_comics_cont += 1
+
+deleted_file.close()
+    
+print ("Writing database to file")
+
+outfile = open(out_file,"wb")
+outfile.write("Missing;" + end_date + "\n")
+
+for volume_id in sorted(comics.iterkeys()):
+    issues = ""
+    nums = ""
+    for issue_id in sorted(comics[volume_id].iterkeys()):
+        issues += str(issue_id) + ","
+        nums += comics[volume_id][issue_id] + ","
+    issues = issues[:-1]
+    outfile.write(str(volume_id) + ";" + issues + ";" + nums.encode('utf-8','ignore') + "\n")
+    
+outfile.close()
+
+print ("Done! " + str(new_comics_cont) + " comics added to database! (" + str(comic_skip_cont)+ " skipped and " + str(old_comics_cont) + " comics already in database)" )
+print (str(deleted_comics_cont) + " comics in databased not retrieved in this round.")
+print (str(updated_comics_cont) + " comics updated in database.")
+print ("Ids with error in server: " + ErrorIds[1:])
+print (cont)
+raw_input("Press Enter to continue...")