Fixed errors
This commit is contained in:
parent
a848096baa
commit
ec88a6914f
235
lego_dl.sh
235
lego_dl.sh
@ -1,131 +1,138 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
##############################
|
##############################
|
||||||
#
|
#
|
||||||
# NOTE: There is still some small errors with sets with multiple booklets
|
# NOTE: There is still some small errors with sets with multiple booklets
|
||||||
#
|
#
|
||||||
# This scripts uses the brickset instructions file to get links and set numbers.
|
# This scripts uses the brickset instructions file to get links and set numbers.
|
||||||
# In order to get themes, we get themes from rebrickable.
|
# In order to get themes, we get themes from rebrickable.
|
||||||
#
|
#
|
||||||
# Files are saved as:
|
# Files are saved as:
|
||||||
# $tID""$setDESC""${setNAME// /_}"_("$setTHEME"_"$setYEAR").pdf"
|
# $tID""$setDESC""${setNAME// /_}"_("$setTHEME"_"$setYEAR").pdf"
|
||||||
# 1190-1_Retro_Buggy_(Town_1999).pdf
|
# 1190-1_Retro_Buggy_(Town_1999).pdf
|
||||||
#
|
#
|
||||||
# If a set contains multiple filesm the file name will be:
|
# If a set contains multiple filesm the file name will be:
|
||||||
# 2520-1_(1_of_2)_Battle_Arena_(Ninjago_2011).pdf
|
# 2520-1_(1_of_2)_Battle_Arena_(Ninjago_2011).pdf
|
||||||
# 2520-1_(2_of_2)_Battle_Arena_(Ninjago_2011).pdf
|
# 2520-1_(2_of_2)_Battle_Arena_(Ninjago_2011).pdf
|
||||||
#
|
#
|
||||||
# Req:
|
# Req:
|
||||||
# bash, awk, grep, sed, curl, wget
|
# bash, awk, grep, sed, curl, wget
|
||||||
#
|
#
|
||||||
# Get themes.csv and sets.cvs from https://rebrickable.com/downloads
|
# Get themes.csv and sets.cvs from https://rebrickable.com/downloads
|
||||||
#
|
#
|
||||||
# error_level:
|
# error_level:
|
||||||
# 0 no errors reported
|
# 0 no errors reported
|
||||||
# 1 Download errors reported
|
# 1 Download errors reported
|
||||||
# 2 Existing files and download errors reported
|
# 2 Existing files and download errors reported
|
||||||
#
|
#
|
||||||
##############################
|
##############################
|
||||||
|
|
||||||
echo "----> Starting Download of all LEGO instructions from https://brickset.com/exportscripts/instructions"
|
echo "----> Starting Download of all LEGO instructions from https://brickset.com/exportscripts/instructions"
|
||||||
firstline=0
|
firstline=0
|
||||||
|
|
||||||
##### CHANGE HERE #####
|
##### CHANGE HERE #####
|
||||||
logfile="lego_errors.log"
|
logfile="lego_errors.log"
|
||||||
downloadFolder="../Instructions"
|
downloadFolder="../Instructions"
|
||||||
error_level=1
|
error_level=1
|
||||||
##### STOP CHANGE #####
|
##### STOP CHANGE #####
|
||||||
|
|
||||||
echo "" > $logfile
|
echo "" > $logfile
|
||||||
|
|
||||||
THEMES="themes.csv"
|
THEMES="themes.csv"
|
||||||
SETS="sets.csv"
|
SETS="sets.csv"
|
||||||
|
|
||||||
if [ -f "instructions" ]; then
|
if [ -f "instructions" ]; then
|
||||||
echo "---> Using existing instructions file. Manually delete it to redownload..."
|
echo "---> Using existing instructions file. Manually delete it to redownload..."
|
||||||
else
|
else
|
||||||
echo -ne "---> Downloading csv from Brickset..."
|
echo -ne "---> Downloading csv from Brickset..."
|
||||||
wget https://brickset.com/exportscripts/instructions &> /dev/null
|
wget https://brickset.com/exportscripts/instructions &> /dev/null
|
||||||
if [[ "$?" != 0 ]]; then
|
if [[ "$?" != 0 ]]; then
|
||||||
echo "Error... Exiting!"
|
echo "Error... Exiting!"
|
||||||
exit;
|
exit;
|
||||||
else
|
else
|
||||||
echo "Done!"
|
echo "Done!"
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "----> Converting csv with ',' to '|'..."
|
|
||||||
head -1 instructions | sed 's/,/|/g' > instructions.csv
|
|
||||||
sed 1,1d instructions | sed -r 's/("[^",]+),([^",]+")/\1###\2/g' | awk -F, '{print $1,$2,$3,$4,$5,$6}' | sed 's/###/,/g' | sed 's/" "/"|"/g' >> instructions.csv
|
|
||||||
|
|
||||||
while IFS='|' read -r ID LINK NAME DESC ADDED RETRIVED
|
echo "----> Converting csv with ',' to '|'..."
|
||||||
do
|
head -1 instructions | sed 's/,/|/g' > instructions.csv
|
||||||
|
sed 1,1d instructions | sed -r 's/("[^",]+),([^",]+")/\1###\2/g' | awk -F, '{print $1,$2,$3,$4,$5,$6}' | sed 's/###/,/g' | sed 's/" "/"|"/g' >> instructions.csv
|
||||||
|
|
||||||
if [ "$firstline" = 0 ]; then
|
LINES=$(wc -l instructions | awk '{ print $1 }')
|
||||||
firstline=1
|
CURRENT=0
|
||||||
else
|
|
||||||
tID=$(sed -e 's/^"//' -e 's/"$//' <<<"$ID")
|
|
||||||
tLINK=$(sed -e 's/^"//' -e 's/"$//' <<<"$LINK")
|
|
||||||
#tNAME=$(sed -e 's/^"//' -e 's/"$//' <<<"$NAME")
|
|
||||||
ttNAME=$(cut -d, -f1-2 sets.csv | grep -w $tID | cut -d, -f2)
|
|
||||||
setYEAR=$(grep -w $tID sets.csv | cut -d, -f3)
|
|
||||||
setNAME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< $ttNAME)
|
|
||||||
themeID=$(grep -w $tID sets.csv | cut -d, -f4)
|
|
||||||
themeName=$(awk -F',' -v id="$themeID" '$1 == id' themes.csv)
|
|
||||||
IFS=',' read -r -a array <<< "$themeName"
|
|
||||||
tempID=${array[2]}
|
|
||||||
if [[ $tempID != "" ]]; then
|
|
||||||
while [[ $tempID != "" ]]
|
|
||||||
do
|
|
||||||
tthemeName=$(awk -F',' -v id="$tempID" '$1 == id' themes.csv)
|
|
||||||
IFS=',' read -r -a tArray <<< "$tthemeName"
|
|
||||||
tempID=${tArray[2]}
|
|
||||||
themeName=${tArray[1]}
|
|
||||||
done
|
|
||||||
else
|
|
||||||
themeName=${array[1]}
|
|
||||||
fi
|
|
||||||
|
|
||||||
setTHEME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< $themeName)
|
while IFS='|' read -r ID LINK NAME DESC ADDED RETRIVED
|
||||||
|
do
|
||||||
tADDED=$(sed -e 's/^"//' -e 's/"$//' <<<"$ADDED")
|
|
||||||
tDESC=$(sed -e 's/^"//' -e 's/"$//' <<<"$DESC")
|
|
||||||
ttDESC=$(echo $tDESC | grep -Eo '\s[0-9]{1}\s?\/\s?[0-9]{1,2}' | sed 's/ //g' | sed 's/\//_of_/g')
|
|
||||||
|
|
||||||
if [ -z "$ttDESC" ]; then
|
|
||||||
setDESC="_"
|
|
||||||
else
|
|
||||||
setDESC="_("$ttDESC")_"
|
|
||||||
fi
|
|
||||||
|
|
||||||
tFilename=""$tID""$setDESC""${setNAME// /_}"_("$setTHEME"_"$setYEAR").pdf"
|
#$PC=$((200*$CURRENT/$LINES % 2 + 100*$CURRENT/$LINES))
|
||||||
filename=$tFilename
|
#CURRENT=$((CURRENT + 1))
|
||||||
|
if [ "$firstline" = 0 ]; then
|
||||||
|
firstline=1
|
||||||
|
else
|
||||||
|
tID=$(sed -e 's/^"//' -e 's/"$//' <<<"$ID")
|
||||||
|
tLINK=$(sed -e 's/^"//' -e 's/"$//' <<<"$LINK")
|
||||||
|
#tNAME=$(sed -e 's/^"//' -e 's/"$//' <<<"$NAME")
|
||||||
|
ttNAME=$(cut -d, -f1-2 sets.csv | grep -w $tID | cut -d, -f2)
|
||||||
|
setYEAR=$(grep -w $tID sets.csv | cut -d, -f3)
|
||||||
|
setNAME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< $ttNAME)
|
||||||
|
themeID=$(grep -w $tID sets.csv | cut -d, -f4)
|
||||||
|
themeName=$(awk -F',' -v id="$themeID" '$1 == id' themes.csv)
|
||||||
|
IFS=',' read -r -a array <<< "$themeName"
|
||||||
|
tempID=${array[2]}
|
||||||
|
if [[ $tempID != "" ]]; then
|
||||||
|
while [[ $tempID != "" ]]
|
||||||
|
do
|
||||||
|
tthemeName=$(awk -F',' -v id="$tempID" '$1 == id' themes.csv)
|
||||||
|
IFS=',' read -r -a tArray <<< "$tthemeName"
|
||||||
|
tempID=${tArray[2]}
|
||||||
|
themeName=${tArray[1]}
|
||||||
|
done
|
||||||
|
else
|
||||||
|
themeName=${array[1]}
|
||||||
|
fi
|
||||||
|
|
||||||
if [ -f "$downloadFolder/$filename" ]; then
|
setTHEME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< $themeName)
|
||||||
if [[ $error_level = 2 ]]; then
|
|
||||||
echo "-> $tID exists. Skipping..."
|
tADDED=$(sed -e 's/^"//' -e 's/"$//' <<<"$ADDED")
|
||||||
echo "$filename exists." >> $logfile
|
tDESC=$(sed -e 's/^"//' -e 's/"$//' <<<"$DESC")
|
||||||
fi
|
|
||||||
else
|
ttDESC=$(echo $tDESC | grep -Eo '[^0-9][0-9]{1}\s?\/\s?[0-9]{1,2}' | sed 's/[^A-Za-z0-9/]//g' | sed 's/\//_of_/g')
|
||||||
if [[ "$tDESC" = "{No longer listed at LEGO.com}" ]] ; then
|
#ttDESC=$(echo $tDESC | grep -Eo '\s[0-9]{1}\s?\/\s?[0-9]{1,2}' | sed 's/ //g' | sed 's/\//_of_/g')
|
||||||
if [[ $error_level = 1 || $error_level = 2 ]]; then
|
|
||||||
echo "-> $tID is not available. Skipping..."
|
if [ -z "$ttDESC" ]; then
|
||||||
echo "$filename is not available." >> $logfile
|
setDESC="_"
|
||||||
fi
|
else
|
||||||
else
|
setDESC="_("$ttDESC")_"
|
||||||
echo -ne "--> $tID downloading now..."
|
fi
|
||||||
curl -H "Mozilla/5.0 (platform; rv:75.0) Gecko/20100101 Firefox/75.0" -L $tLINK --silent --output "$downloadFolder/$filename"
|
#PC=$(echo $CURRENT $LINES | awk '{print 100*$1/$2}')
|
||||||
if [ -f "$downloadFolder/$filename" ]; then
|
tFilename=""$tID""$setDESC""${setNAME// /_}"_("$setTHEME"_"$setYEAR").pdf"
|
||||||
echo "Done! > $filename"
|
filename=$tFilename
|
||||||
else
|
|
||||||
echo "ERROR!"
|
if [ -f "$downloadFolder/$filename" ]; then
|
||||||
if [[ $error_level = 1 || $error_level = 2 ]]; then
|
if [[ $error_level = 2 ]]; then
|
||||||
echo
|
echo "-> $tID exists. Skipping..."
|
||||||
echo "--> Not downloaded. Try again manually..."
|
echo "$filename exists." >> $logfile
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
if [[ "$tDESC" = "{No longer listed at LEGO.com}" ]] ; then
|
||||||
|
if [[ $error_level = 1 || $error_level = 2 ]]; then
|
||||||
|
echo "-> $tID is not available. Skipping..."
|
||||||
|
echo "$filename is not available." >> $logfile
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo -ne "--> $tID downloading now..."
|
||||||
|
curl -H "Mozilla/5.0 (platform; rv:75.0) Gecko/20100101 Firefox/75.0" -L $tLINK --silent --output "$downloadFolder/$filename"
|
||||||
|
if [ -f "$downloadFolder/$filename" ]; then
|
||||||
|
echo "Done! > $filename"
|
||||||
|
else
|
||||||
|
echo "ERROR!"
|
||||||
|
if [[ $error_level = 1 || $error_level = 2 ]]; then
|
||||||
|
echo "--> Not downloaded. Try again manually..."
|
||||||
echo "$filename was not downloaded. Check CURL" >> $logfile
|
echo "$filename was not downloaded. Check CURL" >> $logfile
|
||||||
fi
|
fi
|
||||||
|
#random sleep in order to not look like a script
|
||||||
|
sleep $(( ( RANDOM % 5 ) + 1 ))
|
||||||
fi
|
fi
|
||||||
#random sleep in order to not look like a script
|
|
||||||
sleep $(( ( RANDOM % 1 ) + 1 ))
|
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
Loading…
Reference in New Issue
Block a user