From d296cf0f97b59cfc45da21ec808981b41397b752 Mon Sep 17 00:00:00 2001 From: Frederik Baerentsen Date: Wed, 24 Jun 2020 08:41:55 +0200 Subject: [PATCH] Fixed errors --- lego_dl.sh | 78 +++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 65 insertions(+), 13 deletions(-) diff --git a/lego_dl.sh b/lego_dl.sh index a92ae7d..a680b76 100644 --- a/lego_dl.sh +++ b/lego_dl.sh @@ -1,17 +1,22 @@ #!/bin/bash +# Get themes.csv and sets.cvs from https://rebrickable.com/downloads + + echo "----> Starting Download of all LEGO instructions from https://brickset.com/exportscripts/instructions" firstline=0 logfile="lego_errors.log" -downloadFolder="Instructions" +downloadFolder="../Instructions" #error_level: # 0 no errors reported # 1 Download errors reported # 2 Existing files and download errors reported - -error_level=1 - +error_level=0 echo "" > $logfile + +THEMES="themes.csv" +SETS="sets.csv" + if [ -f "instructions" ]; then echo "---> Using existing instructions file. Manually delete it to redownload..." else @@ -24,19 +29,63 @@ else echo "Done!" fi fi -while IFS=, read -r ID LINK NAME DESC ADDED RETRIVED + +echo "----> Converting csv with ',' to '|'..." +head -1 instructions | sed 's/,/|/g' > instructions.csv +sed 1,1d instructions | sed -r 's/("[^",]+),([^",]+")/\1###\2/g' | awk -F, '{print $1,$2,$3,$4,$5,$6}' | sed 's/###/,/g' | sed 's/" "/"|"/g' >> instructions.csv + +while IFS='|' read -r ID LINK NAME DESC ADDED RETRIVED do + + #echo $ID $LINK $NAME $DESC $ADDED $RETRIVED + if [ "$firstline" = 0 ]; then firstline=1 else #echo "I got:$ID - $LINK - $NAME - $DESC - $ADDED - $RETRIVED" - + tID=$(sed -e 's/^"//' -e 's/"$//' <<<"$ID") tLINK=$(sed -e 's/^"//' -e 's/"$//' <<<"$LINK") - tNAME=$(sed -e 's/^"//' -e 's/"$//' <<<"$NAME") + #tNAME=$(sed -e 's/^"//' -e 's/"$//' <<<"$NAME") + ttNAME=$(cut -d, -f1-2 sets.csv | grep -w $tID | cut -d, -f2) + setYEAR=$(grep -w $tID sets.csv | cut -d, -f3) + setNAME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< $ttNAME) + themeID=$(grep -w $tID sets.csv | cut -d, -f4) + themeName=$(awk -F',' -v id="$themeID" '$1 == id' themes.csv) + IFS=',' read -r -a array <<< "$themeName" + tempID=${array[2]} + if [[ $tempID != "" ]]; then + while [[ $tempID != "" ]] + do + #echo "tempID: $tempID" + tthemeName=$(awk -F',' -v id="$tempID" '$1 == id' themes.csv) + #echo $tthemeName + IFS=',' read -r -a tArray <<< "$tthemeName" + #echo "<${tArray[2]}>" + tempID=${tArray[2]} + themeName=${tArray[1]} + done + else + themeName=${array[1]} + fi + + setTHEME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< $themeName) + + #echo $setYEAR + tADDED=$(sed -e 's/^"//' -e 's/"$//' <<<"$ADDED") tDESC=$(sed -e 's/^"//' -e 's/"$//' <<<"$DESC") - filename=$tID"_"${tNAME// /_}"_("${tDESC// /_}")_"$tADDED".pdf" + ttDESC=$(echo $tDESC | grep -Eo '\s[0-9]{1}\s?\/\s?[0-9]{1,2}' | sed 's/ //g' | sed 's/\//_of_/g') + + if [ -z "$ttDESC" ]; then + setDESC="_" + else + setDESC="_("$ttDESC")_" + fi + + tFilename=""$tID""$setDESC""${setNAME// /_}"_("$setTHEME"_"$setYEAR").pdf" + filename=$tFilename + if [ -f "$downloadFolder/$filename" ]; then echo "-> $tID exists. Skipping..." if [[ $error_level = 2 ]]; then @@ -49,22 +98,25 @@ do echo "$filename is not available." >> $logfile fi else + #echo "___ URL $tLINK" echo -ne "--> $tID downloading now..." - curl -H "Mozilla/5.0 (platform; rv:75.0) Gecko/20100101 Firefox/75.0" $tLINK --silent --output "Instructions/$filename" + curl -H "Mozilla/5.0 (platform; rv:75.0) Gecko/20100101 Firefox/75.0" -L $tLINK --silent --output "$downloadFolder/$filename" + #wget -T20 --tries=3 --retry-connrefused --continue -O "Instructions/$filename" $tLINK if [ -f "$downloadFolder/$filename" ]; then - echo "Done!" + echo "Done! > $filename" else echo "ERROR!" - echo "--> Not downloaded. Try again manually..." if [[ $error_level = 1 || $error_level = 2 ]]; then + echo + echo "--> Not downloaded. Try again manually..." echo "$filename was not downloaded. Check CURL" >> $logfile fi fi #random sleep in order to not look like a script - sleep $(( ( RANDOM % 10 ) + 1 )) + sleep $(( ( RANDOM % 5 ) + 1 )) fi fi fi -done < instructions +done < instructions.csv