diff --git a/lego_dl.sh b/lego_dl.sh index cc44d9f..c4591cd 100644 --- a/lego_dl.sh +++ b/lego_dl.sh @@ -1,38 +1,70 @@ - #!/bin/bash - ############################## - # - # This scripts uses the brickset instructions file to get links and set numbers. - # In order to get themes, we get themes from rebrickable. - # If a booklet isn't available on lego.com, then brickinstructions.com is tested. - # - # Files are saved as: - # $tID""$setDESC""${setNAME// /_}"_("$setTHEME"_"$setYEAR").pdf" - # 1190-1_Retro_Buggy_(Town_1999).pdf - # - # If a set contains multiple filesm the file name will be: - # 2520-1_(1_of_2)_Battle_Arena_(Ninjago_2011).pdf - # 2520-1_(2_of_2)_Battle_Arena_(Ninjago_2011).pdf - # - # Req: - # bash, awk, grep, sed, curl, wget - # - # Get themes.csv and sets.cvs from https://rebrickable.com/downloads - # - # error_level: - # 0 no errors reported - # 1 Download errors reported - # 2 Existing files and download errors reported - # - ############################## +#!/bin/bash +############################## +# +# This scripts uses the brickset instructions file to get links and set numbers. +# In order to get themes, we get themes from rebrickable. +# If a booklet isn't available on lego.com, then brickinstructions.com is tested. +# +# Files are saved as: +# $tID""$setDESC""${setNAME// /_}"_("$setTHEME"_"$setYEAR").pdf" +# 1190-1_Retro_Buggy_(Town_1999).pdf +# +# If a set contains multiple filesm the file name will be: +# 2520-1_(1_of_2)_Battle_Arena_(Ninjago_2011).pdf +# 2520-1_(2_of_2)_Battle_Arena_(Ninjago_2011).pdf +# +# Req: +# bash, awk, grep, sed, curl, wget +# +# Get themes.csv and sets.cvs from https://rebrickable.com/downloads +# +# error_level: +# 0 no errors reported +# 1 Download errors reported +# 2 Existing files and download errors reported +# +############################## - echo "----> Starting Download of all LEGO instructions from https://brickset.com/exportscripts/instructions" - firstline=0 +echo "----> Starting Download of all LEGO instructions from https://brickset.com/exportscripts/instructions" +firstline=0 + +##### CHANGE HERE ##### +logfile="lego_errors.log" +downloadFolder="../Instructions" +error_level=2 +delay=1 # set to 1 for not making delays, set to 0 for random delays from 1-5 seconds. +DEBUG=false # set to true to stop all downloads and just do file checks. +##### STOP CHANGE ##### + + +##### +# +# NOTES: Try and test other sites if LEGO or brickinstructions aren't working. eg. Peeron or Brickset. +# +##### +#deTe=0 +#firstline=0 +#while IFS=',' read -r ID NAME YEAR THEME_ID NUM_PARTS +#do +# if [ "$firstline" = 0 ]; then +# firstline=1 +# else +# +# if [[ ${ID#0} -gt 1000 && "$YEAR" -gt 1990 && "$NUM_PARTS" -gt 50 ]]; then +# deTe=$((deTe+1)) +# if [ "$deTe" -gt 5 ]; then +# exit +# fi +# echo "$ID, $YEAR, $NAME, $NUM_PARTS" +# awk -F '|' '$1 ~ /"^'$D'"/' instructions.csv +# fi +# fi +#done < sets.csv +#exit +# +# +##### - ##### CHANGE HERE ##### - logfile="lego_errors.log" - downloadFolder="../Instructions" - error_level=1 - ##### STOP CHANGE ##### echo "" > $logfile @@ -48,6 +80,7 @@ echo "Error... Exiting!" exit; else + mv "Brickset-instructions.csv" "instructions" echo "Done!" fi fi @@ -68,28 +101,39 @@ fi } - while IFS='|' read -r ID LINK NAME DESC ADDED RETRIVED + while IFS='|' read -r ID LINK DESC ADDED RETRIVED do #$PC=$((200*$CURRENT/$LINES % 2 + 100*$CURRENT/$LINES)) #CURRENT=$((CURRENT + 1)) if [ "$firstline" = 0 ]; then firstline=1 - else + else + + if [ "$DEBUG" = true ]; then + echo "$ID" + echo "$LINK" + #echo "$NAME" + echo "$DESC" + echo "$ADDED" + echo "$RETRIVED" + fi tID=$(sed -e 's/^"//' -e 's/"$//' <<<"$ID") tLINK=$(sed -e 's/^"//' -e 's/"$//' <<<"$LINK") #tNAME=$(sed -e 's/^"//' -e 's/"$//' <<<"$NAME") - ttNAME=$(cut -d, -f1-2 sets.csv | grep -w $tID | cut -d, -f2) - setYEAR=$(grep -w $tID sets.csv | cut -d, -f3) - setNAME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< $ttNAME) - themeID=$(grep -w $tID sets.csv | cut -d, -f4) - themeName=$(awk -F',' -v id="$themeID" '$1 == id' themes.csv) + ttNAME=$(cut -d, -f1-2 "$SETS" | grep -w "$tID" | cut -d, -f2) + #echo "ttNAME: $ttNAME" + setYEAR=$(grep -w "$tID" "$SETS" | cut -d, -f3) + setNAME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< "$ttNAME") + #echo "setNAME: $setNAME" + themeID=$(grep -w "$tID" "$SETS" | cut -d, -f4) + themeName=$(awk -F',' -v id="$themeID" '$1 == id' "$THEMES") IFS=',' read -r -a array <<< "$themeName" tempID=${array[2]} - if [[ $tempID != "" ]]; then + if [[ $tempID != "" ]]; then while [[ $tempID != "" ]] do - tthemeName=$(awk -F',' -v id="$tempID" '$1 == id' themes.csv) + tthemeName=$(awk -F',' -v id="$tempID" '$1 == id' "$THEMES") IFS=',' read -r -a tArray <<< "$tthemeName" tempID=${tArray[2]} themeName=${tArray[1]} @@ -98,24 +142,49 @@ themeName=${array[1]} fi - setTHEME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< $themeName) + setTHEME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< "$themeName") tADDED=$(sed -e 's/^"//' -e 's/"$//' <<<"$ADDED") tDESC=$(sed -e 's/^"//' -e 's/"$//' <<<"$DESC") + #echo "DESC: $DESC" - ttDESC=$(echo $tDESC | grep -Eo '[^0-9][0-9]{1}\s?\/\s?[0-9]{1,2}' | sed 's/[^A-Za-z0-9/]//g' | sed 's/\//_of_/g') + + #echo $tDESC + ttDESC=$(echo "$tDESC" | grep -Eo '[^0-9][0-9]{1}\s?\/\s?[0-9]{1,2}(\s|$)' | sed 's/[^A-Za-z0-9/]//g' | sed 's/\//_of_/g') + #echo "TEST"$(echo "$tDESC" | grep -Eo '[^0-9][0-9]{1}\s?\/\s?[0-9]{1,2}(\s|$)') + + #echo "--->ttDESC: $ttDESC" #ttDESC=$(echo $tDESC | grep -Eo '\s[0-9]{1}\s?\/\s?[0-9]{1,2}' | sed 's/ //g' | sed 's/\//_of_/g') - if [ -z "$ttDESC" ]; then - etDESC="_" + tDESC="_" + if [ "$DEBUG" = true ]; then + echo "ttDESC is empty" + fi else - tDESC="_("$ttDESC")_" + tDESC="_($ttDESC)_" + if [ "$DEBUG" = true ]; then + echo "ttDESC is not empty" + fi fi + #echo "tDESC: $tDESC" + #echo "setDESC: $setDESC" #PC=$(echo $CURRENT $LINES | awk '{print 100*$1/$2}') - tFilename=""$tID"_"$setDESC""${setNAME// /_}"_("$setTHEME"_"$setYEAR").pdf" + tFilename="$tID$tDESC${setNAME// /_}_($setTHEME""_""$setYEAR).pdf" filename=$tFilename + #echo $filename + + + if [ "$DEBUG" = true ]; then + echo "--->FILENAME: $filename" + #echo "-->================================================<--" + CURRENT=$((CURRENT+1)) + if [ "$CURRENT" -gt 30 ]; then + exit + fi + fi if [ -f "$downloadFolder/$filename" ]; then + #echo "$downloadFolder/$filename" if [[ $error_level = 2 ]]; then echo "-> $tID exists. Skipping..." echo "$filename exists." >> $logfile @@ -123,40 +192,68 @@ else if [[ "$tDESC" = "{No longer listed at LEGO.com}" ]] ; then echo -ne "-> $tID testing links..." - if validate_url $tLINK; then + if validate_url "$tLINK"; then echo -ne "Found on LEGO.com... Downloading..." - curl -H "Mozilla/5.0 (platform; rv:75.0) Gecko/20100101 Firefox/75.0" -L $tLINK --silent --output "$downloadFolder/$filename" - if [ -f "$downloadFolder/$filename" ]; then - echo "Done! > $filename" - else + + if [ "$DEBUG" != true ]; then + curl -H "Mozilla/5.0 (platform; rv:75.0) Gecko/20100101 Firefox/75.0" -L "$tLINK" --silent --output "$downloadFolder/$filename" + fi + if [ $(head -c 4 "$downloadFolder/$filename") = "%PDF" ]; then + if [ -f "$downloadFolder/$filename" ]; then + echo "Done! > $filename" + else + echo "ERROR!" + if [[ $error_level = 1 || $error_level = 2 ]]; then + echo "--> Not downloaded. Try again manually..." + echo "$filename was not downloaded. Check CURL" >> $logfile + fi + fi + else echo "ERROR!" + rm "$downloadFolder/$filename" if [[ $error_level = 1 || $error_level = 2 ]]; then - echo "--> Not downloaded. Try again manually..." + echo "--> File is not a PDF..." echo "$filename was not downloaded. Check CURL" >> $logfile fi fi #random sleep in order to not look like a script - sleep $(( ( RANDOM % 5 ) + 1 )) + if [[ $delay = 0 ]]; then + sleep $(( ( RANDOM % 5 ) + 1 )) + fi else #test - biID=$(sed -e 's/[^0-9_]/_/g' <<< $tID) + biID=$(sed -e 's/[^0-9_]/_/g' <<< "$tID") biLink="https://lego.brickinstructions.com/pdfdrop/" if validate_url "$biLink$biID.pdf"; then echo -ne "Found on BrickInstructions.com... Downloading..." - curl -H "Mozilla/5.0 (platform; rv:75.0) Gecko/20100101 Firefox/75.0" -L "$biLink$tID.pdf" --silent --output "$downloadFolder/$filename" - if [ -f "$downloadFolder/$filename" ]; then - echo "Done! > $filename" + + if [ "$DEBUG" != true ]; then + curl -H "Mozilla/5.0 (platform; rv:75.0) Gecko/20100101 Firefox/75.0" -L "$biLink$tID.pdf" --silent --output "$downloadFolder/$filename" + fi + if [ $(head -c 4 "$downloadFolder/$filename") = "%PDF" ]; then + if [ -f "$downloadFolder/$filename" ]; then + echo "Done! > $filename" + else + echo "ERROR!" + if [[ $error_level = 1 || $error_level = 2 ]]; then + echo "--> Not downloaded. Try again manually..." + echo "$filename was not downloaded. Check CURL" >> $logfile + fi + fi else - echo "ERROR!" - if [[ $error_level = 1 || $error_level = 2 ]]; then - echo "--> Not downloaded. Try again manually..." + echo "ERROR!" + rm "$downloadFolder/$filename" + if [[ $error_level = 1 || $error_level = 2 ]]; then + echo "--> File is not a PDF..." echo "$filename was not downloaded. Check CURL" >> $logfile fi - fi + fi #random sleep in order to not look like a script - sleep $(( ( RANDOM % 5 ) + 1 )) - else + if [[ $delay = 0 ]]; then + sleep $(( ( RANDOM % 5 ) + 1 )) + fi + else if [[ $error_level = 1 || $error_level = 2 ]]; then echo "-> $tID is not available. Skipping..." echo "$filename is not available." >> $logfile @@ -166,20 +263,42 @@ else echo -ne "--> $tID downloading now..." - curl -H "Mozilla/5.0 (platform; rv:75.0) Gecko/20100101 Firefox/75.0" -L $tLINK --silent --output "$downloadFolder/$filename" - if [ -f "$downloadFolder/$filename" ]; then - echo "Done! > $filename" + + if [ "$DEBUG" != true ]; then + curl -H "Mozilla/5.0 (platform; rv:75.0) Gecko/20100101 Firefox/75.0" -L "$tLINK" --silent --output "$downloadFolder/$filename" + fi + if [ $(head -c 4 "$downloadFolder/$filename") = "%PDF" ]; then + if [ -f "$downloadFolder/$filename" ]; then + echo "Done! > $filename" + else + echo "ERROR!" + if [[ $error_level = 1 || $error_level = 2 ]]; then + echo "--> Not downloaded. Try again manually..." + echo "$filename was not downloaded. Check CURL" >> $logfile + fi + #random sleep in order to not look like a script + if [[ $delay = 0 ]]; then + sleep $(( ( RANDOM % 5 ) + 1 )) + fi + fi else - echo "ERROR!" + echo "ERROR!" + rm "$downloadFolder/$filename" if [[ $error_level = 1 || $error_level = 2 ]]; then - echo "--> Not downloaded. Try again manually..." + echo "--> File is not a PDF..." echo "$filename was not downloaded. Check CURL" >> $logfile fi - #random sleep in order to not look like a script - sleep $(( ( RANDOM % 5 ) + 1 )) fi fi fi fi done < instructions.csv + + + + + + + +