From 68ddb6bf862d4f78de861736ac41d0e584315ea3 Mon Sep 17 00:00:00 2001 From: Frederik Baerentsen Date: Thu, 25 Jun 2020 18:14:25 +0200 Subject: [PATCH] Added test to download from brickinstructions if lego.com is unavailable --- lego_dl.sh | 181 +++++++++++++++++++++++++++-------------------------- 1 file changed, 91 insertions(+), 90 deletions(-) diff --git a/lego_dl.sh b/lego_dl.sh index e39701f..a6ebfe3 100644 --- a/lego_dl.sh +++ b/lego_dl.sh @@ -66,7 +66,7 @@ return 0 else return 1 - fi + fi } while IFS='|' read -r ID LINK NAME DESC ADDED RETRIVED @@ -74,106 +74,108 @@ #$PC=$((200*$CURRENT/$LINES % 2 + 100*$CURRENT/$LINES)) #CURRENT=$((CURRENT + 1)) - if [ "$firstline" = 0 ]; then - firstline=1 - else - tID=$(sed -e 's/^"//' -e 's/"$//' <<<"$ID") - tLINK=$(sed -e 's/^"//' -e 's/"$//' <<<"$LINK") - #tNAME=$(sed -e 's/^"//' -e 's/"$//' <<<"$NAME") - ttNAME=$(cut -d, -f1-2 sets.csv | grep -w $tID | cut -d, -f2) - setYEAR=$(grep -w $tID sets.csv | cut -d, -f3) - setNAME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< $ttNAME) - themeID=$(grep -w $tID sets.csv | cut -d, -f4) - themeName=$(awk -F',' -v id="$themeID" '$1 == id' themes.csv) - IFS=',' read -r -a array <<< "$themeName" - tempID=${array[2]} - if [[ $tempID != "" ]]; then - while [[ $tempID != "" ]] - do - tthemeName=$(awk -F',' -v id="$tempID" '$1 == id' themes.csv) - IFS=',' read -r -a tArray <<< "$tthemeName" - tempID=${tArray[2]} - themeName=${tArray[1]} - done - else - themeName=${array[1]} - fi + if [ "$firstline" = 0 ]; then + firstline=1 + else + tID=$(sed -e 's/^"//' -e 's/"$//' <<<"$ID") + tLINK=$(sed -e 's/^"//' -e 's/"$//' <<<"$LINK") + #tNAME=$(sed -e 's/^"//' -e 's/"$//' <<<"$NAME") + ttNAME=$(cut -d, -f1-2 sets.csv | grep -w $tID | cut -d, -f2) + setYEAR=$(grep -w $tID sets.csv | cut -d, -f3) + setNAME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< $ttNAME) + themeID=$(grep -w $tID sets.csv | cut -d, -f4) + themeName=$(awk -F',' -v id="$themeID" '$1 == id' themes.csv) + IFS=',' read -r -a array <<< "$themeName" + tempID=${array[2]} + if [[ $tempID != "" ]]; then + while [[ $tempID != "" ]] + do + tthemeName=$(awk -F',' -v id="$tempID" '$1 == id' themes.csv) + IFS=',' read -r -a tArray <<< "$tthemeName" + tempID=${tArray[2]} + themeName=${tArray[1]} + done + else + themeName=${array[1]} + fi - setTHEME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< $themeName) - - tADDED=$(sed -e 's/^"//' -e 's/"$//' <<<"$ADDED") - tDESC=$(sed -e 's/^"//' -e 's/"$//' <<<"$DESC") - - ttDESC=$(echo $tDESC | grep -Eo '[^0-9][0-9]{1}\s?\/\s?[0-9]{1,2}' | sed 's/[^A-Za-z0-9/]//g' | sed 's/\//_of_/g') - #ttDESC=$(echo $tDESC | grep -Eo '\s[0-9]{1}\s?\/\s?[0-9]{1,2}' | sed 's/ //g' | sed 's/\//_of_/g') + setTHEME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< $themeName) - if [ -z "$ttDESC" ]; then - setDESC="_" - else - setDESC="_("$ttDESC")_" - fi - #PC=$(echo $CURRENT $LINES | awk '{print 100*$1/$2}') - tFilename=""$tID""$setDESC""${setNAME// /_}"_("$setTHEME"_"$setYEAR").pdf" - filename=$tFilename + tADDED=$(sed -e 's/^"//' -e 's/"$//' <<<"$ADDED") + tDESC=$(sed -e 's/^"//' -e 's/"$//' <<<"$DESC") - if [ -f "$downloadFolder/$filename" ]; then - if [[ $error_level = 2 ]]; then - echo "-> $tID exists. Skipping..." - echo "$filename exists." >> $logfile - fi - else - if [[ "$tDESC" = "{No longer listed at LEGO.com}" ]] ; then - echo -ne "-> $tID testing link." + ttDESC=$(echo $tDESC | grep -Eo '[^0-9][0-9]{1}\s?\/\s?[0-9]{1,2}' | sed 's/[^A-Za-z0-9/]//g' | sed 's/\//_of_/g') + #ttDESC=$(echo $tDESC | grep -Eo '\s[0-9]{1}\s?\/\s?[0-9]{1,2}' | sed 's/ //g' | sed 's/\//_of_/g') - if validate_url $tLINK; then - echo -ne "Found... Downloading..." - - curl -H "Mozilla/5.0 (platform; rv:75.0) Gecko/20100101 Firefox/75.0" -L $tLINK --silent --output "$downloadFolder/$filename" - if [ -f "$downloadFolder/$filename" ]; then - echo "Done! > $filename" - else - echo "ERROR!" - if [[ $error_level = 1 || $error_level = 2 ]]; then - echo "--> Not downloaded. Try again manually..." + if [ -z "$ttDESC" ]; then + etDESC="_" + else + tDESC="_("$ttDESC")_" + fi + #PC=$(echo $CURRENT $LINES | awk '{print 100*$1/$2}') + tFilename=""$tID"_"$setDESC""${setNAME// /_}"_("$setTHEME"_"$setYEAR").pdf" + filename=$tFilename + + if [ -f "$downloadFolder/$filename" ]; then + if [[ $error_level = 2 ]]; then + echo "-> $tID exists. Skipping..." + echo "$filename exists." >> $logfile + fi + else + if [[ "$tDESC" = "{No longer listed at LEGO.com}" ]] ; then + echo -ne "-> $tID testing link." + echo -ne "LEGO.com <$tLINK>..." + if validate_url $tLINK; then + echo -ne "Found on lego.com... Downloading..." + curl -H "Mozilla/5.0 (platform; rv:75.0) Gecko/20100101 Firefox/75.0" -L $tLINK --silent --output "$downloadFolder/$filename" + if [ -f "$downloadFolder/$filename" ]; then + echo "Done! > $filename" + else + echo "ERROR!" + if [[ $error_level = 1 || $error_level = 2 ]]; then + echo "--> Not downloaded. Try again manually..." + echo "$filename was not downloaded. Check CURL" >> $logfile + fi + fi + #random sleep in order to not look like a script + sleep $(( ( RANDOM % 5 ) + 1 )) + else + #test + + biID=$(sed -e 's/[^0-9_]/_/g' <<< $tID) + biLink="https://lego.brickinstructions.com/pdfdrop/" + echo -ne "BrickInstructions <$biLink$biID.pdf>..." + if validate_url "$biLink$biID.pdf"; then + echo -ne "Found on BrickInstructions... Downloading..." + curl -H "Mozilla/5.0 (platform; rv:75.0) Gecko/20100101 Firefox/75.0" -L "$biLink$tID.pdf" --silent --output "$downloadFolder/$filename" + if [ -f "$downloadFolder/$filename" ]; then + echo "Done! > $filename" + else + echo "ERROR!" + if [[ $error_level = 1 || $error_level = 2 ]]; then + echo "--> Not downloaded. Try again manually..." echo "$filename was not downloaded. Check CURL" >> $logfile fi fi #random sleep in order to not look like a script sleep $(( ( RANDOM % 5 ) + 1 )) else - #test - biLink="https://lego.brickinstructions.com/pdfdrop/" - if validate_url "$biLink$tID.pdf"; then - echo -ne "Found on BrickInstructions... Downloading..." - curl -H "Mozilla/5.0 (platform; rv:75.0) Gecko/20100101 Firefox/75.0" -L "$biLink$tID.pdf" --silent --output "$downloadFolder/$filename" - if [ -f "$downloadFolder/$filename" ]; then - echo "Done! > $filename" - else - echo "ERROR!" - if [[ $error_level = 1 || $error_level = 2 ]]; then - echo "--> Not downloaded. Try again manually..." - echo "$filename was not downloaded. Check CURL" >> $logfile + if [[ $error_level = 1 || $error_level = 2 ]]; then + echo "-> $tID is not available. Skipping..." + echo "$filename is not available." >> $logfile fi - fi - #random sleep in order to not look like a script - sleep $(( ( RANDOM % 5 ) + 1 )) - fi - fi + fi + fi + + else + echo -ne "--> $tID downloading now..." + curl -H "Mozilla/5.0 (platform; rv:75.0) Gecko/20100101 Firefox/75.0" -L $tLINK --silent --output "$downloadFolder/$filename" + if [ -f "$downloadFolder/$filename" ]; then + echo "Done! > $filename" else - if [[ $error_level = 1 || $error_level = 2 ]]; then - echo "-> $tID is not available. Skipping..." - echo "$filename is not available." >> $logfile - fi - fi - else - echo -ne "--> $tID downloading now..." - curl -H "Mozilla/5.0 (platform; rv:75.0) Gecko/20100101 Firefox/75.0" -L $tLINK --silent --output "$downloadFolder/$filename" - if [ -f "$downloadFolder/$filename" ]; then - echo "Done! > $filename" - else - echo "ERROR!" - if [[ $error_level = 1 || $error_level = 2 ]]; then - echo "--> Not downloaded. Try again manually..." + echo "ERROR!" + if [[ $error_level = 1 || $error_level = 2 ]]; then + echo "--> Not downloaded. Try again manually..." echo "$filename was not downloaded. Check CURL" >> $logfile fi #random sleep in order to not look like a script @@ -182,6 +184,5 @@ fi fi fi - done < instructions.csv