2020-06-23 15:02:27 +02:00
#!/bin/bash
2020-06-24 08:41:55 +02:00
# Get themes.csv and sets.cvs from https://rebrickable.com/downloads
2020-06-23 15:02:27 +02:00
echo "----> Starting Download of all LEGO instructions from https://brickset.com/exportscripts/instructions"
firstline = 0
logfile = "lego_errors.log"
2020-06-24 08:41:55 +02:00
downloadFolder = "../Instructions"
2020-06-23 15:02:27 +02:00
#error_level:
# 0 no errors reported
# 1 Download errors reported
# 2 Existing files and download errors reported
2020-06-24 08:41:55 +02:00
error_level = 0
echo "" > $logfile
2020-06-23 15:02:27 +02:00
2020-06-24 08:41:55 +02:00
THEMES = "themes.csv"
SETS = "sets.csv"
2020-06-23 15:02:27 +02:00
if [ -f "instructions" ] ; then
echo "---> Using existing instructions file. Manually delete it to redownload..."
else
echo -ne "---> Downloading csv from Brickset..."
wget https://brickset.com/exportscripts/instructions & > /dev/null
if [ [ " $? " != 0 ] ] ; then
echo "Error... Exiting!"
exit;
else
echo "Done!"
fi
fi
2020-06-24 08:41:55 +02:00
echo "----> Converting csv with ',' to '|'..."
head -1 instructions | sed 's/,/|/g' > instructions.csv
sed 1,1d instructions | sed -r 's/("[^",]+),([^",]+")/\1###\2/g' | awk -F, '{print $1,$2,$3,$4,$5,$6}' | sed 's/###/,/g' | sed 's/" "/"|"/g' >> instructions.csv
while IFS = '|' read -r ID LINK NAME DESC ADDED RETRIVED
2020-06-23 15:02:27 +02:00
do
2020-06-24 08:41:55 +02:00
#echo $ID $LINK $NAME $DESC $ADDED $RETRIVED
2020-06-23 15:02:27 +02:00
if [ " $firstline " = 0 ] ; then
firstline = 1
else
#echo "I got:$ID - $LINK - $NAME - $DESC - $ADDED - $RETRIVED"
2020-06-24 08:41:55 +02:00
2020-06-23 15:02:27 +02:00
tID = $( sed -e 's/^"//' -e 's/"$//' <<< " $ID " )
tLINK = $( sed -e 's/^"//' -e 's/"$//' <<< " $LINK " )
2020-06-24 08:41:55 +02:00
#tNAME=$(sed -e 's/^"//' -e 's/"$//' <<<"$NAME")
ttNAME = $( cut -d, -f1-2 sets.csv | grep -w $tID | cut -d, -f2)
setYEAR = $( grep -w $tID sets.csv | cut -d, -f3)
setNAME = $( sed -e 's/[^A-Za-z0-9._-]/_/g' <<< $ttNAME )
themeID = $( grep -w $tID sets.csv | cut -d, -f4)
themeName = $( awk -F',' -v id = " $themeID " '$1 == id' themes.csv)
IFS = ',' read -r -a array <<< " $themeName "
tempID = ${ array [2] }
if [ [ $tempID != "" ] ] ; then
while [ [ $tempID != "" ] ]
do
#echo "tempID: $tempID"
tthemeName = $( awk -F',' -v id = " $tempID " '$1 == id' themes.csv)
#echo $tthemeName
IFS = ',' read -r -a tArray <<< " $tthemeName "
#echo "<${tArray[2]}>"
tempID = ${ tArray [2] }
themeName = ${ tArray [1] }
done
else
themeName = ${ array [1] }
fi
setTHEME = $( sed -e 's/[^A-Za-z0-9._-]/_/g' <<< $themeName )
#echo $setYEAR
2020-06-23 15:02:27 +02:00
tADDED = $( sed -e 's/^"//' -e 's/"$//' <<< " $ADDED " )
tDESC = $( sed -e 's/^"//' -e 's/"$//' <<< " $DESC " )
2020-06-24 08:41:55 +02:00
ttDESC = $( echo $tDESC | grep -Eo '\s[0-9]{1}\s?\/\s?[0-9]{1,2}' | sed 's/ //g' | sed 's/\//_of_/g' )
if [ -z " $ttDESC " ] ; then
setDESC = "_"
else
setDESC = "_(" $ttDESC ")_"
fi
tFilename = "" $tID "" $setDESC "" ${ setNAME // /_ } "_(" $setTHEME "_" $setYEAR ").pdf"
filename = $tFilename
2020-06-23 15:09:56 +02:00
if [ -f " $downloadFolder / $filename " ] ; then
2020-06-23 15:02:27 +02:00
echo " -> $tID exists. Skipping... "
if [ [ $error_level = 2 ] ] ; then
echo " $filename exists. " >> $logfile
fi
else
if [ [ " $tDESC " = "{No longer listed at LEGO.com}" ] ] ; then
echo " -> $tID is not available. Skipping... "
if [ [ $error_level = 1 || $error_level = 2 ] ] ; then
echo " $filename is not available. " >> $logfile
fi
else
2020-06-24 08:41:55 +02:00
#echo "___ URL $tLINK"
2020-06-23 15:02:27 +02:00
echo -ne " --> $tID downloading now... "
2020-06-24 08:41:55 +02:00
curl -H "Mozilla/5.0 (platform; rv:75.0) Gecko/20100101 Firefox/75.0" -L $tLINK --silent --output " $downloadFolder / $filename "
#wget -T20 --tries=3 --retry-connrefused --continue -O "Instructions/$filename" $tLINK
2020-06-23 15:09:56 +02:00
if [ -f " $downloadFolder / $filename " ] ; then
2020-06-24 08:41:55 +02:00
echo " Done! > $filename "
2020-06-23 15:02:27 +02:00
else
echo "ERROR!"
if [ [ $error_level = 1 || $error_level = 2 ] ] ; then
2020-06-24 08:41:55 +02:00
echo
echo "--> Not downloaded. Try again manually..."
2020-06-23 15:02:27 +02:00
echo " $filename was not downloaded. Check CURL " >> $logfile
fi
fi
2020-06-23 15:19:59 +02:00
#random sleep in order to not look like a script
2020-06-24 08:41:55 +02:00
sleep $(( ( RANDOM % 5 ) + 1 ))
2020-06-23 15:02:27 +02:00
fi
fi
fi
2020-06-24 08:41:55 +02:00
done < instructions.csv
2020-06-23 15:02:27 +02:00