DataHoarder_scripts/lego_dl.sh

133 lines
3.9 KiB
Bash
Raw Normal View History

2020-06-23 15:02:27 +02:00
#!/bin/bash
2020-06-24 08:51:26 +02:00
##############################
#
# This scripts uses the brickset instructions file to get links and set numbers.
# In order to get themes, we get themes from rebrickable.
#
# Files are saved as:
# $tID""$setDESC""${setNAME// /_}"_("$setTHEME"_"$setYEAR").pdf"
# 1190-1_Retro_Buggy_(Town_1999).pdf
#
# If a set contains multiple filesm the file name will be:
2020-06-24 08:57:02 +02:00
# 2520-1_(1_of_2)_Battle_Arena_(Ninjago_2011).pdf
# 2520-1_(2_of_2)_Battle_Arena_(Ninjago_2011).pdf
2020-06-24 08:51:26 +02:00
#
# Req:
# bash, awk, grep, sed, curl, wget
#
2020-06-24 08:41:55 +02:00
# Get themes.csv and sets.cvs from https://rebrickable.com/downloads
2020-06-24 08:51:26 +02:00
#
# error_level:
# 0 no errors reported
# 1 Download errors reported
# 2 Existing files and download errors reported
#
##############################
2020-06-24 08:41:55 +02:00
2020-06-23 15:02:27 +02:00
echo "----> Starting Download of all LEGO instructions from https://brickset.com/exportscripts/instructions"
firstline=0
2020-06-24 08:51:26 +02:00
##### CHANGE HERE #####
2020-06-23 15:02:27 +02:00
logfile="lego_errors.log"
2020-06-24 08:41:55 +02:00
downloadFolder="../Instructions"
2020-06-24 08:54:07 +02:00
error_level=1
2020-06-24 08:51:26 +02:00
##### STOP CHANGE #####
2020-06-24 08:41:55 +02:00
echo "" > $logfile
2020-06-23 15:02:27 +02:00
2020-06-24 08:41:55 +02:00
THEMES="themes.csv"
SETS="sets.csv"
2020-06-23 15:02:27 +02:00
if [ -f "instructions" ]; then
echo "---> Using existing instructions file. Manually delete it to redownload..."
else
echo -ne "---> Downloading csv from Brickset..."
wget https://brickset.com/exportscripts/instructions &> /dev/null
if [[ "$?" != 0 ]]; then
echo "Error... Exiting!"
exit;
else
echo "Done!"
fi
fi
2020-06-24 08:41:55 +02:00
echo "----> Converting csv with ',' to '|'..."
head -1 instructions | sed 's/,/|/g' > instructions.csv
sed 1,1d instructions | sed -r 's/("[^",]+),([^",]+")/\1###\2/g' | awk -F, '{print $1,$2,$3,$4,$5,$6}' | sed 's/###/,/g' | sed 's/" "/"|"/g' >> instructions.csv
while IFS='|' read -r ID LINK NAME DESC ADDED RETRIVED
2020-06-23 15:02:27 +02:00
do
2020-06-24 08:41:55 +02:00
2020-06-23 15:02:27 +02:00
if [ "$firstline" = 0 ]; then
firstline=1
2020-06-24 08:51:26 +02:00
else
2020-06-23 15:02:27 +02:00
tID=$(sed -e 's/^"//' -e 's/"$//' <<<"$ID")
tLINK=$(sed -e 's/^"//' -e 's/"$//' <<<"$LINK")
2020-06-24 08:41:55 +02:00
#tNAME=$(sed -e 's/^"//' -e 's/"$//' <<<"$NAME")
ttNAME=$(cut -d, -f1-2 sets.csv | grep -w $tID | cut -d, -f2)
setYEAR=$(grep -w $tID sets.csv | cut -d, -f3)
setNAME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< $ttNAME)
themeID=$(grep -w $tID sets.csv | cut -d, -f4)
themeName=$(awk -F',' -v id="$themeID" '$1 == id' themes.csv)
IFS=',' read -r -a array <<< "$themeName"
tempID=${array[2]}
if [[ $tempID != "" ]]; then
while [[ $tempID != "" ]]
do
tthemeName=$(awk -F',' -v id="$tempID" '$1 == id' themes.csv)
IFS=',' read -r -a tArray <<< "$tthemeName"
tempID=${tArray[2]}
themeName=${tArray[1]}
done
else
themeName=${array[1]}
fi
setTHEME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< $themeName)
2020-06-24 08:51:26 +02:00
2020-06-23 15:02:27 +02:00
tADDED=$(sed -e 's/^"//' -e 's/"$//' <<<"$ADDED")
tDESC=$(sed -e 's/^"//' -e 's/"$//' <<<"$DESC")
2020-06-24 08:41:55 +02:00
ttDESC=$(echo $tDESC | grep -Eo '\s[0-9]{1}\s?\/\s?[0-9]{1,2}' | sed 's/ //g' | sed 's/\//_of_/g')
if [ -z "$ttDESC" ]; then
setDESC="_"
else
setDESC="_("$ttDESC")_"
fi
tFilename=""$tID""$setDESC""${setNAME// /_}"_("$setTHEME"_"$setYEAR").pdf"
filename=$tFilename
2020-06-23 15:09:56 +02:00
if [ -f "$downloadFolder/$filename" ]; then
2020-06-23 15:02:27 +02:00
echo "-> $tID exists. Skipping..."
if [[ $error_level = 2 ]]; then
echo "$filename exists." >> $logfile
fi
else
if [[ "$tDESC" = "{No longer listed at LEGO.com}" ]] ; then
echo "-> $tID is not available. Skipping..."
if [[ $error_level = 1 || $error_level = 2 ]]; then
echo "$filename is not available." >> $logfile
fi
else
echo -ne "--> $tID downloading now..."
2020-06-24 08:41:55 +02:00
curl -H "Mozilla/5.0 (platform; rv:75.0) Gecko/20100101 Firefox/75.0" -L $tLINK --silent --output "$downloadFolder/$filename"
2020-06-23 15:09:56 +02:00
if [ -f "$downloadFolder/$filename" ]; then
2020-06-24 08:41:55 +02:00
echo "Done! > $filename"
2020-06-23 15:02:27 +02:00
else
echo "ERROR!"
if [[ $error_level = 1 || $error_level = 2 ]]; then
2020-06-24 08:41:55 +02:00
echo
echo "--> Not downloaded. Try again manually..."
2020-06-23 15:02:27 +02:00
echo "$filename was not downloaded. Check CURL" >> $logfile
fi
fi
2020-06-23 15:19:59 +02:00
#random sleep in order to not look like a script
2020-06-24 08:54:07 +02:00
sleep $(( ( RANDOM % 1 ) + 1 ))
2020-06-23 15:02:27 +02:00
fi
fi
fi
2020-06-24 08:41:55 +02:00
done < instructions.csv
2020-06-23 15:02:27 +02:00