#!/bin/bash # Get themes.csv and sets.cvs from https://rebrickable.com/downloads echo "----> Starting Download of all LEGO instructions from https://brickset.com/exportscripts/instructions" firstline=0 logfile="lego_errors.log" downloadFolder="../Instructions" #error_level: # 0 no errors reported # 1 Download errors reported # 2 Existing files and download errors reported error_level=0 echo "" > $logfile THEMES="themes.csv" SETS="sets.csv" if [ -f "instructions" ]; then echo "---> Using existing instructions file. Manually delete it to redownload..." else echo -ne "---> Downloading csv from Brickset..." wget https://brickset.com/exportscripts/instructions &> /dev/null if [[ "$?" != 0 ]]; then echo "Error... Exiting!" exit; else echo "Done!" fi fi echo "----> Converting csv with ',' to '|'..." head -1 instructions | sed 's/,/|/g' > instructions.csv sed 1,1d instructions | sed -r 's/("[^",]+),([^",]+")/\1###\2/g' | awk -F, '{print $1,$2,$3,$4,$5,$6}' | sed 's/###/,/g' | sed 's/" "/"|"/g' >> instructions.csv while IFS='|' read -r ID LINK NAME DESC ADDED RETRIVED do #echo $ID $LINK $NAME $DESC $ADDED $RETRIVED if [ "$firstline" = 0 ]; then firstline=1 else #echo "I got:$ID - $LINK - $NAME - $DESC - $ADDED - $RETRIVED" tID=$(sed -e 's/^"//' -e 's/"$//' <<<"$ID") tLINK=$(sed -e 's/^"//' -e 's/"$//' <<<"$LINK") #tNAME=$(sed -e 's/^"//' -e 's/"$//' <<<"$NAME") ttNAME=$(cut -d, -f1-2 sets.csv | grep -w $tID | cut -d, -f2) setYEAR=$(grep -w $tID sets.csv | cut -d, -f3) setNAME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< $ttNAME) themeID=$(grep -w $tID sets.csv | cut -d, -f4) themeName=$(awk -F',' -v id="$themeID" '$1 == id' themes.csv) IFS=',' read -r -a array <<< "$themeName" tempID=${array[2]} if [[ $tempID != "" ]]; then while [[ $tempID != "" ]] do #echo "tempID: $tempID" tthemeName=$(awk -F',' -v id="$tempID" '$1 == id' themes.csv) #echo $tthemeName IFS=',' read -r -a tArray <<< "$tthemeName" #echo "<${tArray[2]}>" tempID=${tArray[2]} themeName=${tArray[1]} done else themeName=${array[1]} fi setTHEME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< $themeName) #echo $setYEAR tADDED=$(sed -e 's/^"//' -e 's/"$//' <<<"$ADDED") tDESC=$(sed -e 's/^"//' -e 's/"$//' <<<"$DESC") ttDESC=$(echo $tDESC | grep -Eo '\s[0-9]{1}\s?\/\s?[0-9]{1,2}' | sed 's/ //g' | sed 's/\//_of_/g') if [ -z "$ttDESC" ]; then setDESC="_" else setDESC="_("$ttDESC")_" fi tFilename=""$tID""$setDESC""${setNAME// /_}"_("$setTHEME"_"$setYEAR").pdf" filename=$tFilename if [ -f "$downloadFolder/$filename" ]; then echo "-> $tID exists. Skipping..." if [[ $error_level = 2 ]]; then echo "$filename exists." >> $logfile fi else if [[ "$tDESC" = "{No longer listed at LEGO.com}" ]] ; then echo "-> $tID is not available. Skipping..." if [[ $error_level = 1 || $error_level = 2 ]]; then echo "$filename is not available." >> $logfile fi else #echo "___ URL $tLINK" echo -ne "--> $tID downloading now..." curl -H "Mozilla/5.0 (platform; rv:75.0) Gecko/20100101 Firefox/75.0" -L $tLINK --silent --output "$downloadFolder/$filename" #wget -T20 --tries=3 --retry-connrefused --continue -O "Instructions/$filename" $tLINK if [ -f "$downloadFolder/$filename" ]; then echo "Done! > $filename" else echo "ERROR!" if [[ $error_level = 1 || $error_level = 2 ]]; then echo echo "--> Not downloaded. Try again manually..." echo "$filename was not downloaded. Check CURL" >> $logfile fi fi #random sleep in order to not look like a script sleep $(( ( RANDOM % 5 ) + 1 )) fi fi fi done < instructions.csv