From 7fe9c3e5008d2052bbc10769e6e988e57261caf5 Mon Sep 17 00:00:00 2001 From: Frederik Baerentsen Date: Fri, 8 Jul 2022 10:29:37 +0200 Subject: [PATCH] Added UpdateMissing script --- Update Missing/.update.bat.un~ | Bin 0 -> 1510 bytes Update Missing/.update.sh.un~ | Bin 0 -> 1795 bytes Update Missing/20220404.mcl | 125693 ++++++++++++++++++++++++++ Update Missing/20220404_latest.mcl | 125693 ++++++++++++++++++++++++++ Update Missing/update.bat | 11 + Update Missing/update.bat~ | 11 + Update Missing/update.sh | 17 + Update Missing/update.sh~ | 17 + Update Missing/update_missing.py | 202 + 9 files changed, 251644 insertions(+) create mode 100644 Update Missing/.update.bat.un~ create mode 100644 Update Missing/.update.sh.un~ create mode 100644 Update Missing/20220404.mcl create mode 100644 Update Missing/20220404_latest.mcl create mode 100644 Update Missing/update.bat create mode 100644 Update Missing/update.bat~ create mode 100644 Update Missing/update.sh create mode 100644 Update Missing/update.sh~ create mode 100644 Update Missing/update_missing.py diff --git a/Update Missing/.update.bat.un~ b/Update Missing/.update.bat.un~ new file mode 100644 index 0000000000000000000000000000000000000000..320dbb4177df3a6606a38f94b398880e0bb7a15d GIT binary patch literal 1510 zcmWH`%$*;a=aT=FfoX~>-?7Eo<^4TZtdDnd*HG-Q-EFf7%hWHC0 zSs;KZ1hGL@zDfYi=R&dr6cxiKahf)Q(;LWB m$N-e~M$;Q|h|rV`Om9b_*&l0qTSU#m(8i7yrJQNw^Hl(e>(?y+ literal 0 HcmV?d00001 diff --git a/Update Missing/.update.sh.un~ b/Update Missing/.update.sh.un~ new file mode 100644 index 0000000000000000000000000000000000000000..e139276414f467fa78b9d2dfc09d6f9ccaef475c GIT binary patch literal 1795 zcmWH`%$*;a=aT=FfyuH%Nwu?q@u;=uQu|YUj>{L`&E2Ihw_eq33YU{};{6*83=D!m zTwGA8plhU{R9cXdSdto_n^|0(nU}6tP^qM#q>@&Yp9>OJQi{(3QpF{Dxyd;oi4-Lz z2-DKs(8AK#IN2c4I5E-0#K1ho%+k^{#URZ%DaqK-Ak8#2)zaKN%}4=mmV!zO&{{qq zW(8tKAZ7+)5CG9I%#d{a)e07nEF(kw1&}NdFhM9V36g>t!tfsmz(ye{7-dp31c1>o z7aSc73|}RH2J-?jKM;eG5GX1JA{m)Bf>Q=^5(2seoJv3`!vVw@O&Oq2pd|pNj79iT zMrlE1Nk)Dior@S~+CVNbK!E@%UO;KX5yTly8=z32B><+4< %olddate2:~0,4%-%olddate2:~4,2%-%olddate2:~6,2% %date:~6,4%-%date:~3,2%-%date:~0,2% diff --git a/Update Missing/update.bat~ b/Update Missing/update.bat~ new file mode 100644 index 0000000..466333e --- /dev/null +++ b/Update Missing/update.bat~ @@ -0,0 +1,11 @@ +@echo off +echo Date format = %date% +setlocal enabledelayedexpansion +set cnt=0 +for %%i in (*.mcl) do ( + set "num=%%~ni" + if !num! GTR !cnt! set cnt=!num! +) +set /a olddate=cnt +set /a olddate2=cnt-1 +py -2 update_missing.py %olddate%.mcl %date:~6,4%%date:~3,2%%date:~0,2%.mcl 9718933c0a3aa4407d6995d0f3bb310f5ee977f2 %olddate2:~0,4%-%olddate2:~4,2%-%olddate2:~6,2% %date:~6,4%-%date:~3,2%-%date:~0,2% \ No newline at end of file diff --git a/Update Missing/update.sh b/Update Missing/update.sh new file mode 100644 index 0000000..8cb8307 --- /dev/null +++ b/Update Missing/update.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +d=`date +%Y%m%d` + +echo "Todays date $d" + +#mv "missing.mcl" "missing_old.mcl" + +fromdate=$(ls -1 *_latest.mcl |grep -Eo '[[:digit:]]{8}') + +echo "Last date run $fromdate" + + +python2 "update_missing.py" "$fromdate""_latest.mcl" "$d"".mcl" $fromdate $d + +cp "$d"".mcl" "$d""_latest.mcl" + diff --git a/Update Missing/update.sh~ b/Update Missing/update.sh~ new file mode 100644 index 0000000..983d96b --- /dev/null +++ b/Update Missing/update.sh~ @@ -0,0 +1,17 @@ +#!/bin/bash + +d=`date +%Y%m%d` + +echo "Todays date $d" + +#mv "missing.mcl" "missing_old.mcl" + +fromdate=$(ls -1 *_latest.mcl |grep -Eo '[[:digit:]]{8}') + +echo "Last date run $fromdate" + + +py -2 "update_missing.py" "$fromdate""_latest.mcl" "$d"".mcl" 9718933c0a3aa4407d6995d0f3bb310f5ee977f2 $fromdate $d + +cp "$d"".mcl" "$d""_latest.mcl" + diff --git a/Update Missing/update_missing.py b/Update Missing/update_missing.py new file mode 100644 index 0000000..3528b71 --- /dev/null +++ b/Update Missing/update_missing.py @@ -0,0 +1,202 @@ +#!/usr/bin/env python + +""" + Ver 2.0 + + Overview: + + This script grabs the newest issues from ComicVine and 'appends' them to the + missing.mcl file contents. + + usage: + python update_missing.py + + in_file: missing.mcl (i.e., your most recent version) + out_file: missing_ (or w/e you want to name it) + api_key: provided by ComicVine + start_date: the date the mcl file is synched with (YYYY-MM-DD) + end_date: today's date (YYYY-MM-DD) + + e.g., + python update_missing.py missing.mcl missing_20170917.mcl API_KEY 2017-09-11 2017-09-17 + + Technical stuff: + + The mcl file format contains a header followed by a list of volumes with + their respective issues/numbers. + + Missing; + ;list of ;list of + ;list of ;list of + ... + ;list of ;list of + + The lists are comma delimited. Commas followed immediately by a space are + not considered a delimiter. Some issues are numbered like "v. 1, no. 01". + If there is a space in the list of issue numbers, the entire list is + wrapped in double quotes. + + Note: There is one volume (id: 77901) that has an issue number "1,5". This + can potentially wreak some havoc if not treated carefully. +""" + +import requests +import sys + +if len(sys.argv) < 6 : + print ("usage: python update_missing.py ") + exit() + +in_file = str(sys.argv[1]) # missing.mcl +out_file = str(sys.argv[2]) # updated_missing.mcl +api_key = str(sys.argv[3]) # ComicVine API key +start_date = str(sys.argv[4]) # start date range to search for new issues +end_date = str(sys.argv[5]) # end date range to search for new issues + +comiclist = open(in_file, "r") +issues_number = {} +issues_volume = {} +skip_header = True +cont = 0 + +print ("Reading in current database") +for line in comiclist: + if skip_header: + skip_header = False + continue + + line_split = unicode(line, encoding='utf-8').replace("\n","").split(";") + volume_id = int(line_split[0]) + + if (line_split[1][0] == '"') and (line_split[1][len(line_split[1])] == '"'): + line_split = line_split[1:-1] + + issue_split = line_split[1].split(",") + num_split = line_split[2].split(",") + + for i in range(0,len(issue_split)): + if issues_number.has_key(int(issue_split[i])): + cont += 1 + issues_number[int(issue_split[i])] = num_split[i] + issues_volume[int(issue_split[i])] = volume_id + +comiclist.close() + +print ("Querying ComicVine for new issues") +headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'} +new_comics_cont = 0 +old_comics_cont = 0 +updated_comics_cont = 0 +deleted_comics_cont = 0 +comic_skip_cont = 0 +offset = 0 +max = 100 +limit = 100 +skip = 0 +retry = 0 +ErrorIds = "" +non_retrieved_comics = issues_number.copy() +FindingError = False + +while offset < max: + try: + request_url = "https://comicvine.gamespot.com/api/issues/?api_key=" + api_key +"&limit=" + str(limit) + "&format=json&offset=" + str(offset) + "&field_list=id,issue_number,volume&filter=date_last_updated:" + start_date + "|" + end_date + "&sort=id" + + '''print request_url''' + r = requests.get(request_url, headers=headers) + json_obj = r.json() + + max = json_obj['number_of_total_results'] + + print (str(min(offset,max)) + "/" + str(max) + " Since " + start_date) + + for i in json_obj['results']: + volume_id = i['volume']['id'] + issue_id = i['id'] + num = unicode(i['issue_number']).replace(",",".&@1").replace(";",".&@2").replace("\n","").replace("\r","") + + if not issues_number.has_key(issue_id): + new_comics_cont += 1 + issues_number[issue_id] = num + issues_volume[issue_id] = volume_id + else: + del non_retrieved_comics[issue_id] + old_comics_cont += 1 + if issues_number[issue_id] != num or issues_volume[issue_id] != volume_id: + updated_comics_cont += 1 + issues_number[issue_id] = num + issues_volume[issue_id] = volume_id + + offset += limit + skip + + FindingError = False + + if skip == 1: + print ("Comic with error found, id= " + str(issue_id+1)) + ErrorIds += ";"+ str(issue_id+1) + comic_skip_cont += 1 + print ("Continue loading comics now...") + FindingError = True + + skip = 0 + limit = 100 + retry = 0 + + except: + if retry < 4 and not FindingError: + print ("Error. Trying Again...") + retry += 1 + else: + + if not FindingError: + + print ("Finding Error in comic list: " + str(100-limit) + "%") + skip = 1 + limit -= 1 + + if limit == 0 or FindingError: + print ("Comic with error found, id= " + str(issue_id+offset)) + FindingError = True + limit = 1 + offset += 1 + comic_skip_cont += 1 + ErrorIds += ";"+ str(issue_id+offset) + +comics = {} +for issue_id in issues_number.keys(): + if not comics.has_key(issues_volume[issue_id]): + comics[issues_volume[issue_id]] = {} + comics[issues_volume[issue_id]][issue_id]=issues_number[issue_id] + +print ("Writing missings to file") + +deleted_file = open("Deleted_Comics.txt", "wb") + +for issue_id in non_retrieved_comics.keys(): + deleted_file.write(str(issue_id)+"\n") + deleted_comics_cont += 1 + +deleted_file.close() + +print ("Writing database to file") + +outfile = open(out_file,"wb") +outfile.write("Missing;" + end_date + "\n") + +for volume_id in sorted(comics.iterkeys()): + issues = "" + nums = "" + for issue_id in sorted(comics[volume_id].iterkeys()): + issues += str(issue_id) + "," + nums += comics[volume_id][issue_id] + "," + issues = issues[:-1] + outfile.write(str(volume_id) + ";" + issues + ";" + nums.encode('utf-8','ignore') + "\n") + +outfile.close() + +print ("Done! " + str(new_comics_cont) + " comics added to database! (" + str(comic_skip_cont)+ " skipped and " + str(old_comics_cont) + " comics already in database)" ) +print (str(deleted_comics_cont) + " comics in databased not retrieved in this round.") +print (str(updated_comics_cont) + " comics updated in database.") +print ("Ids with error in server: " + ErrorIds[1:]) +print (cont) +raw_input("Press Enter to continue...")