From ec88a6914fe7b2ab609a1db3a6438461e7516f75 Mon Sep 17 00:00:00 2001 From: Frederik Baerentsen Date: Wed, 24 Jun 2020 10:18:54 +0200 Subject: [PATCH] Fixed errors --- lego_dl.sh | 235 +++++++++++++++++++++++++++-------------------------- 1 file changed, 121 insertions(+), 114 deletions(-) diff --git a/lego_dl.sh b/lego_dl.sh index 1f0359a..df2a7b3 100644 --- a/lego_dl.sh +++ b/lego_dl.sh @@ -1,131 +1,138 @@ -#!/bin/bash -############################## -# -# NOTE: There is still some small errors with sets with multiple booklets -# -# This scripts uses the brickset instructions file to get links and set numbers. -# In order to get themes, we get themes from rebrickable. -# -# Files are saved as: -# $tID""$setDESC""${setNAME// /_}"_("$setTHEME"_"$setYEAR").pdf" -# 1190-1_Retro_Buggy_(Town_1999).pdf -# -# If a set contains multiple filesm the file name will be: -# 2520-1_(1_of_2)_Battle_Arena_(Ninjago_2011).pdf -# 2520-1_(2_of_2)_Battle_Arena_(Ninjago_2011).pdf -# -# Req: -# bash, awk, grep, sed, curl, wget -# -# Get themes.csv and sets.cvs from https://rebrickable.com/downloads -# -# error_level: -# 0 no errors reported -# 1 Download errors reported -# 2 Existing files and download errors reported -# -############################## + #!/bin/bash + ############################## + # + # NOTE: There is still some small errors with sets with multiple booklets + # + # This scripts uses the brickset instructions file to get links and set numbers. + # In order to get themes, we get themes from rebrickable. + # + # Files are saved as: + # $tID""$setDESC""${setNAME// /_}"_("$setTHEME"_"$setYEAR").pdf" + # 1190-1_Retro_Buggy_(Town_1999).pdf + # + # If a set contains multiple filesm the file name will be: + # 2520-1_(1_of_2)_Battle_Arena_(Ninjago_2011).pdf + # 2520-1_(2_of_2)_Battle_Arena_(Ninjago_2011).pdf + # + # Req: + # bash, awk, grep, sed, curl, wget + # + # Get themes.csv and sets.cvs from https://rebrickable.com/downloads + # + # error_level: + # 0 no errors reported + # 1 Download errors reported + # 2 Existing files and download errors reported + # + ############################## -echo "----> Starting Download of all LEGO instructions from https://brickset.com/exportscripts/instructions" -firstline=0 + echo "----> Starting Download of all LEGO instructions from https://brickset.com/exportscripts/instructions" + firstline=0 -##### CHANGE HERE ##### -logfile="lego_errors.log" -downloadFolder="../Instructions" -error_level=1 -##### STOP CHANGE ##### + ##### CHANGE HERE ##### + logfile="lego_errors.log" + downloadFolder="../Instructions" + error_level=1 + ##### STOP CHANGE ##### -echo "" > $logfile + echo "" > $logfile -THEMES="themes.csv" -SETS="sets.csv" + THEMES="themes.csv" + SETS="sets.csv" -if [ -f "instructions" ]; then - echo "---> Using existing instructions file. Manually delete it to redownload..." -else - echo -ne "---> Downloading csv from Brickset..." - wget https://brickset.com/exportscripts/instructions &> /dev/null - if [[ "$?" != 0 ]]; then - echo "Error... Exiting!" - exit; - else - echo "Done!" - fi -fi + if [ -f "instructions" ]; then + echo "---> Using existing instructions file. Manually delete it to redownload..." + else + echo -ne "---> Downloading csv from Brickset..." + wget https://brickset.com/exportscripts/instructions &> /dev/null + if [[ "$?" != 0 ]]; then + echo "Error... Exiting!" + exit; + else + echo "Done!" + fi + fi -echo "----> Converting csv with ',' to '|'..." -head -1 instructions | sed 's/,/|/g' > instructions.csv -sed 1,1d instructions | sed -r 's/("[^",]+),([^",]+")/\1###\2/g' | awk -F, '{print $1,$2,$3,$4,$5,$6}' | sed 's/###/,/g' | sed 's/" "/"|"/g' >> instructions.csv -while IFS='|' read -r ID LINK NAME DESC ADDED RETRIVED -do + echo "----> Converting csv with ',' to '|'..." + head -1 instructions | sed 's/,/|/g' > instructions.csv + sed 1,1d instructions | sed -r 's/("[^",]+),([^",]+")/\1###\2/g' | awk -F, '{print $1,$2,$3,$4,$5,$6}' | sed 's/###/,/g' | sed 's/" "/"|"/g' >> instructions.csv - if [ "$firstline" = 0 ]; then - firstline=1 - else - tID=$(sed -e 's/^"//' -e 's/"$//' <<<"$ID") - tLINK=$(sed -e 's/^"//' -e 's/"$//' <<<"$LINK") - #tNAME=$(sed -e 's/^"//' -e 's/"$//' <<<"$NAME") - ttNAME=$(cut -d, -f1-2 sets.csv | grep -w $tID | cut -d, -f2) - setYEAR=$(grep -w $tID sets.csv | cut -d, -f3) - setNAME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< $ttNAME) - themeID=$(grep -w $tID sets.csv | cut -d, -f4) - themeName=$(awk -F',' -v id="$themeID" '$1 == id' themes.csv) - IFS=',' read -r -a array <<< "$themeName" - tempID=${array[2]} - if [[ $tempID != "" ]]; then - while [[ $tempID != "" ]] - do - tthemeName=$(awk -F',' -v id="$tempID" '$1 == id' themes.csv) - IFS=',' read -r -a tArray <<< "$tthemeName" - tempID=${tArray[2]} - themeName=${tArray[1]} - done - else - themeName=${array[1]} - fi + LINES=$(wc -l instructions | awk '{ print $1 }') + CURRENT=0 - setTHEME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< $themeName) - - tADDED=$(sed -e 's/^"//' -e 's/"$//' <<<"$ADDED") - tDESC=$(sed -e 's/^"//' -e 's/"$//' <<<"$DESC") - ttDESC=$(echo $tDESC | grep -Eo '\s[0-9]{1}\s?\/\s?[0-9]{1,2}' | sed 's/ //g' | sed 's/\//_of_/g') - - if [ -z "$ttDESC" ]; then - setDESC="_" - else - setDESC="_("$ttDESC")_" - fi + while IFS='|' read -r ID LINK NAME DESC ADDED RETRIVED + do - tFilename=""$tID""$setDESC""${setNAME// /_}"_("$setTHEME"_"$setYEAR").pdf" - filename=$tFilename + #$PC=$((200*$CURRENT/$LINES % 2 + 100*$CURRENT/$LINES)) + #CURRENT=$((CURRENT + 1)) + if [ "$firstline" = 0 ]; then + firstline=1 + else + tID=$(sed -e 's/^"//' -e 's/"$//' <<<"$ID") + tLINK=$(sed -e 's/^"//' -e 's/"$//' <<<"$LINK") + #tNAME=$(sed -e 's/^"//' -e 's/"$//' <<<"$NAME") + ttNAME=$(cut -d, -f1-2 sets.csv | grep -w $tID | cut -d, -f2) + setYEAR=$(grep -w $tID sets.csv | cut -d, -f3) + setNAME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< $ttNAME) + themeID=$(grep -w $tID sets.csv | cut -d, -f4) + themeName=$(awk -F',' -v id="$themeID" '$1 == id' themes.csv) + IFS=',' read -r -a array <<< "$themeName" + tempID=${array[2]} + if [[ $tempID != "" ]]; then + while [[ $tempID != "" ]] + do + tthemeName=$(awk -F',' -v id="$tempID" '$1 == id' themes.csv) + IFS=',' read -r -a tArray <<< "$tthemeName" + tempID=${tArray[2]} + themeName=${tArray[1]} + done + else + themeName=${array[1]} + fi - if [ -f "$downloadFolder/$filename" ]; then - if [[ $error_level = 2 ]]; then - echo "-> $tID exists. Skipping..." - echo "$filename exists." >> $logfile - fi - else - if [[ "$tDESC" = "{No longer listed at LEGO.com}" ]] ; then - if [[ $error_level = 1 || $error_level = 2 ]]; then - echo "-> $tID is not available. Skipping..." - echo "$filename is not available." >> $logfile - fi - else - echo -ne "--> $tID downloading now..." - curl -H "Mozilla/5.0 (platform; rv:75.0) Gecko/20100101 Firefox/75.0" -L $tLINK --silent --output "$downloadFolder/$filename" - if [ -f "$downloadFolder/$filename" ]; then - echo "Done! > $filename" - else - echo "ERROR!" - if [[ $error_level = 1 || $error_level = 2 ]]; then - echo - echo "--> Not downloaded. Try again manually..." + setTHEME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< $themeName) + + tADDED=$(sed -e 's/^"//' -e 's/"$//' <<<"$ADDED") + tDESC=$(sed -e 's/^"//' -e 's/"$//' <<<"$DESC") + + ttDESC=$(echo $tDESC | grep -Eo '[^0-9][0-9]{1}\s?\/\s?[0-9]{1,2}' | sed 's/[^A-Za-z0-9/]//g' | sed 's/\//_of_/g') + #ttDESC=$(echo $tDESC | grep -Eo '\s[0-9]{1}\s?\/\s?[0-9]{1,2}' | sed 's/ //g' | sed 's/\//_of_/g') + + if [ -z "$ttDESC" ]; then + setDESC="_" + else + setDESC="_("$ttDESC")_" + fi + #PC=$(echo $CURRENT $LINES | awk '{print 100*$1/$2}') + tFilename=""$tID""$setDESC""${setNAME// /_}"_("$setTHEME"_"$setYEAR").pdf" + filename=$tFilename + + if [ -f "$downloadFolder/$filename" ]; then + if [[ $error_level = 2 ]]; then + echo "-> $tID exists. Skipping..." + echo "$filename exists." >> $logfile + fi + else + if [[ "$tDESC" = "{No longer listed at LEGO.com}" ]] ; then + if [[ $error_level = 1 || $error_level = 2 ]]; then + echo "-> $tID is not available. Skipping..." + echo "$filename is not available." >> $logfile + fi + else + echo -ne "--> $tID downloading now..." + curl -H "Mozilla/5.0 (platform; rv:75.0) Gecko/20100101 Firefox/75.0" -L $tLINK --silent --output "$downloadFolder/$filename" + if [ -f "$downloadFolder/$filename" ]; then + echo "Done! > $filename" + else + echo "ERROR!" + if [[ $error_level = 1 || $error_level = 2 ]]; then + echo "--> Not downloaded. Try again manually..." echo "$filename was not downloaded. Check CURL" >> $logfile fi + #random sleep in order to not look like a script + sleep $(( ( RANDOM % 5 ) + 1 )) fi - #random sleep in order to not look like a script - sleep $(( ( RANDOM % 1 ) + 1 )) fi fi fi