|
|
@ -1,38 +1,70 @@ |
|
|
|
#!/bin/bash |
|
|
|
############################## |
|
|
|
# |
|
|
|
# This scripts uses the brickset instructions file to get links and set numbers. |
|
|
|
# In order to get themes, we get themes from rebrickable. |
|
|
|
# If a booklet isn't available on lego.com, then brickinstructions.com is tested. |
|
|
|
# |
|
|
|
# Files are saved as: |
|
|
|
# $tID""$setDESC""${setNAME// /_}"_("$setTHEME"_"$setYEAR").pdf" |
|
|
|
# 1190-1_Retro_Buggy_(Town_1999).pdf |
|
|
|
# |
|
|
|
# If a set contains multiple filesm the file name will be: |
|
|
|
# 2520-1_(1_of_2)_Battle_Arena_(Ninjago_2011).pdf |
|
|
|
# 2520-1_(2_of_2)_Battle_Arena_(Ninjago_2011).pdf |
|
|
|
# |
|
|
|
# Req: |
|
|
|
# bash, awk, grep, sed, curl, wget |
|
|
|
# |
|
|
|
# Get themes.csv and sets.cvs from https://rebrickable.com/downloads |
|
|
|
# |
|
|
|
# error_level: |
|
|
|
# 0 no errors reported |
|
|
|
# 1 Download errors reported |
|
|
|
# 2 Existing files and download errors reported |
|
|
|
# |
|
|
|
############################## |
|
|
|
|
|
|
|
echo "----> Starting Download of all LEGO instructions from https://brickset.com/exportscripts/instructions" |
|
|
|
firstline=0 |
|
|
|
|
|
|
|
##### CHANGE HERE ##### |
|
|
|
logfile="lego_errors.log" |
|
|
|
downloadFolder="../Instructions" |
|
|
|
error_level=1 |
|
|
|
##### STOP CHANGE ##### |
|
|
|
#!/bin/bash |
|
|
|
############################## |
|
|
|
# |
|
|
|
# This scripts uses the brickset instructions file to get links and set numbers. |
|
|
|
# In order to get themes, we get themes from rebrickable. |
|
|
|
# If a booklet isn't available on lego.com, then brickinstructions.com is tested. |
|
|
|
# |
|
|
|
# Files are saved as: |
|
|
|
# $tID""$setDESC""${setNAME// /_}"_("$setTHEME"_"$setYEAR").pdf" |
|
|
|
# 1190-1_Retro_Buggy_(Town_1999).pdf |
|
|
|
# |
|
|
|
# If a set contains multiple filesm the file name will be: |
|
|
|
# 2520-1_(1_of_2)_Battle_Arena_(Ninjago_2011).pdf |
|
|
|
# 2520-1_(2_of_2)_Battle_Arena_(Ninjago_2011).pdf |
|
|
|
# |
|
|
|
# Req: |
|
|
|
# bash, awk, grep, sed, curl, wget |
|
|
|
# |
|
|
|
# Get themes.csv and sets.cvs from https://rebrickable.com/downloads |
|
|
|
# |
|
|
|
# error_level: |
|
|
|
# 0 no errors reported |
|
|
|
# 1 Download errors reported |
|
|
|
# 2 Existing files and download errors reported |
|
|
|
# |
|
|
|
############################## |
|
|
|
|
|
|
|
echo "----> Starting Download of all LEGO instructions from https://brickset.com/exportscripts/instructions" |
|
|
|
firstline=0 |
|
|
|
|
|
|
|
##### CHANGE HERE ##### |
|
|
|
logfile="lego_errors.log" |
|
|
|
downloadFolder="../Instructions" |
|
|
|
error_level=2 |
|
|
|
delay=1 # set to 1 for not making delays, set to 0 for random delays from 1-5 seconds. |
|
|
|
DEBUG=false # set to true to stop all downloads and just do file checks. |
|
|
|
##### STOP CHANGE ##### |
|
|
|
|
|
|
|
|
|
|
|
##### |
|
|
|
# |
|
|
|
# NOTES: Try and test other sites if LEGO or brickinstructions aren't working. eg. Peeron or Brickset. |
|
|
|
# |
|
|
|
##### |
|
|
|
#deTe=0 |
|
|
|
#firstline=0 |
|
|
|
#while IFS=',' read -r ID NAME YEAR THEME_ID NUM_PARTS |
|
|
|
#do |
|
|
|
# if [ "$firstline" = 0 ]; then |
|
|
|
# firstline=1 |
|
|
|
# else |
|
|
|
# |
|
|
|
# if [[ ${ID#0} -gt 1000 && "$YEAR" -gt 1990 && "$NUM_PARTS" -gt 50 ]]; then |
|
|
|
# deTe=$((deTe+1)) |
|
|
|
# if [ "$deTe" -gt 5 ]; then |
|
|
|
# exit |
|
|
|
# fi |
|
|
|
# echo "$ID, $YEAR, $NAME, $NUM_PARTS" |
|
|
|
# awk -F '|' '$1 ~ /"^'$D'"/' instructions.csv |
|
|
|
# fi |
|
|
|
# fi |
|
|
|
#done < sets.csv |
|
|
|
#exit |
|
|
|
# |
|
|
|
# |
|
|
|
##### |
|
|
|
|
|
|
|
|
|
|
|
echo "" > $logfile |
|
|
|
|
|
|
@ -48,6 +80,7 @@ |
|
|
|
echo "Error... Exiting!" |
|
|
|
exit; |
|
|
|
else |
|
|
|
mv "Brickset-instructions.csv" "instructions" |
|
|
|
echo "Done!" |
|
|
|
fi |
|
|
|
fi |
|
|
@ -68,28 +101,39 @@ |
|
|
|
fi |
|
|
|
} |
|
|
|
|
|
|
|
while IFS='|' read -r ID LINK NAME DESC ADDED RETRIVED |
|
|
|
while IFS='|' read -r ID LINK DESC ADDED RETRIVED |
|
|
|
do |
|
|
|
|
|
|
|
#$PC=$((200*$CURRENT/$LINES % 2 + 100*$CURRENT/$LINES)) |
|
|
|
#CURRENT=$((CURRENT + 1)) |
|
|
|
if [ "$firstline" = 0 ]; then |
|
|
|
firstline=1 |
|
|
|
else |
|
|
|
else |
|
|
|
|
|
|
|
if [ "$DEBUG" = true ]; then |
|
|
|
echo "$ID" |
|
|
|
echo "$LINK" |
|
|
|
#echo "$NAME" |
|
|
|
echo "$DESC" |
|
|
|
echo "$ADDED" |
|
|
|
echo "$RETRIVED" |
|
|
|
fi |
|
|
|
tID=$(sed -e 's/^"//' -e 's/"$//' <<<"$ID") |
|
|
|
tLINK=$(sed -e 's/^"//' -e 's/"$//' <<<"$LINK") |
|
|
|
#tNAME=$(sed -e 's/^"//' -e 's/"$//' <<<"$NAME") |
|
|
|
ttNAME=$(cut -d, -f1-2 sets.csv | grep -w $tID | cut -d, -f2) |
|
|
|
setYEAR=$(grep -w $tID sets.csv | cut -d, -f3) |
|
|
|
setNAME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< $ttNAME) |
|
|
|
themeID=$(grep -w $tID sets.csv | cut -d, -f4) |
|
|
|
themeName=$(awk -F',' -v id="$themeID" '$1 == id' themes.csv) |
|
|
|
ttNAME=$(cut -d, -f1-2 "$SETS" | grep -w "$tID" | cut -d, -f2) |
|
|
|
#echo "ttNAME: $ttNAME" |
|
|
|
setYEAR=$(grep -w "$tID" "$SETS" | cut -d, -f3) |
|
|
|
setNAME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< "$ttNAME") |
|
|
|
#echo "setNAME: $setNAME" |
|
|
|
themeID=$(grep -w "$tID" "$SETS" | cut -d, -f4) |
|
|
|
themeName=$(awk -F',' -v id="$themeID" '$1 == id' "$THEMES") |
|
|
|
IFS=',' read -r -a array <<< "$themeName" |
|
|
|
tempID=${array[2]} |
|
|
|
if [[ $tempID != "" ]]; then |
|
|
|
if [[ $tempID != "" ]]; then |
|
|
|
while [[ $tempID != "" ]] |
|
|
|
do |
|
|
|
tthemeName=$(awk -F',' -v id="$tempID" '$1 == id' themes.csv) |
|
|
|
tthemeName=$(awk -F',' -v id="$tempID" '$1 == id' "$THEMES") |
|
|
|
IFS=',' read -r -a tArray <<< "$tthemeName" |
|
|
|
tempID=${tArray[2]} |
|
|
|
themeName=${tArray[1]} |
|
|
@ -98,24 +142,49 @@ |
|
|
|
themeName=${array[1]} |
|
|
|
fi |
|
|
|
|
|
|
|
setTHEME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< $themeName) |
|
|
|
setTHEME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< "$themeName") |
|
|
|
|
|
|
|
tADDED=$(sed -e 's/^"//' -e 's/"$//' <<<"$ADDED") |
|
|
|
tDESC=$(sed -e 's/^"//' -e 's/"$//' <<<"$DESC") |
|
|
|
#echo "DESC: $DESC" |
|
|
|
|
|
|
|
ttDESC=$(echo $tDESC | grep -Eo '[^0-9][0-9]{1}\s?\/\s?[0-9]{1,2}' | sed 's/[^A-Za-z0-9/]//g' | sed 's/\//_of_/g') |
|
|
|
#ttDESC=$(echo $tDESC | grep -Eo '\s[0-9]{1}\s?\/\s?[0-9]{1,2}' | sed 's/ //g' | sed 's/\//_of_/g') |
|
|
|
|
|
|
|
#echo $tDESC |
|
|
|
ttDESC=$(echo "$tDESC" | grep -Eo '[^0-9][0-9]{1}\s?\/\s?[0-9]{1,2}(\s|$)' | sed 's/[^A-Za-z0-9/]//g' | sed 's/\//_of_/g') |
|
|
|
#echo "TEST"$(echo "$tDESC" | grep -Eo '[^0-9][0-9]{1}\s?\/\s?[0-9]{1,2}(\s|$)') |
|
|
|
|
|
|
|
#echo "--->ttDESC: $ttDESC" |
|
|
|
#ttDESC=$(echo $tDESC | grep -Eo '\s[0-9]{1}\s?\/\s?[0-9]{1,2}' | sed 's/ //g' | sed 's/\//_of_/g') |
|
|
|
if [ -z "$ttDESC" ]; then |
|
|
|
etDESC="_" |
|
|
|
tDESC="_" |
|
|
|
if [ "$DEBUG" = true ]; then |
|
|
|
echo "ttDESC is empty" |
|
|
|
fi |
|
|
|
else |
|
|
|
tDESC="_("$ttDESC")_" |
|
|
|
tDESC="_($ttDESC)_" |
|
|
|
if [ "$DEBUG" = true ]; then |
|
|
|
echo "ttDESC is not empty" |
|
|
|
fi |
|
|
|
fi |
|
|
|
#echo "tDESC: $tDESC" |
|
|
|
#echo "setDESC: $setDESC" |
|
|
|
#PC=$(echo $CURRENT $LINES | awk '{print 100*$1/$2}') |
|
|
|
tFilename=""$tID"_"$setDESC""${setNAME// /_}"_("$setTHEME"_"$setYEAR").pdf" |
|
|
|
tFilename="$tID$tDESC${setNAME// /_}_($setTHEME""_""$setYEAR).pdf" |
|
|
|
filename=$tFilename |
|
|
|
|
|
|
|
#echo $filename |
|
|
|
|
|
|
|
|
|
|
|
if [ "$DEBUG" = true ]; then |
|
|
|
echo "--->FILENAME: $filename" |
|
|
|
#echo "-->================================================<--" |
|
|
|
CURRENT=$((CURRENT+1)) |
|
|
|
if [ "$CURRENT" -gt 30 ]; then |
|
|
|
exit |
|
|
|
fi |
|
|
|
fi |
|
|
|
if [ -f "$downloadFolder/$filename" ]; then |
|
|
|
#echo "$downloadFolder/$filename" |
|
|
|
if [[ $error_level = 2 ]]; then |
|
|
|
echo "-> $tID exists. Skipping..." |
|
|
|
echo "$filename exists." >> $logfile |
|
|
@ -123,40 +192,68 @@ |
|
|
|
else |
|
|
|
if [[ "$tDESC" = "{No longer listed at LEGO.com}" ]] ; then |
|
|
|
echo -ne "-> $tID testing links..." |
|
|
|
if validate_url $tLINK; then |
|
|
|
if validate_url "$tLINK"; then |
|
|
|
echo -ne "Found on LEGO.com... Downloading..." |
|
|
|
curl -H "Mozilla/5.0 (platform; rv:75.0) Gecko/20100101 Firefox/75.0" -L $tLINK --silent --output "$downloadFolder/$filename" |
|
|
|
if [ -f "$downloadFolder/$filename" ]; then |
|
|
|
echo "Done! > $filename" |
|
|
|
else |
|
|
|
|
|
|
|
if [ "$DEBUG" != true ]; then |
|
|
|
curl -H "Mozilla/5.0 (platform; rv:75.0) Gecko/20100101 Firefox/75.0" -L "$tLINK" --silent --output "$downloadFolder/$filename" |
|
|
|
fi |
|
|
|
if [ $(head -c 4 "$downloadFolder/$filename") = "%PDF" ]; then |
|
|
|
if [ -f "$downloadFolder/$filename" ]; then |
|
|
|
echo "Done! > $filename" |
|
|
|
else |
|
|
|
echo "ERROR!" |
|
|
|
if [[ $error_level = 1 || $error_level = 2 ]]; then |
|
|
|
echo "--> Not downloaded. Try again manually..." |
|
|
|
echo "$filename was not downloaded. Check CURL" >> $logfile |
|
|
|
fi |
|
|
|
fi |
|
|
|
else |
|
|
|
echo "ERROR!" |
|
|
|
rm "$downloadFolder/$filename" |
|
|
|
if [[ $error_level = 1 || $error_level = 2 ]]; then |
|
|
|
echo "--> Not downloaded. Try again manually..." |
|
|
|
echo "--> File is not a PDF..." |
|
|
|
echo "$filename was not downloaded. Check CURL" >> $logfile |
|
|
|
fi |
|
|
|
fi |
|
|
|
#random sleep in order to not look like a script |
|
|
|
sleep $(( ( RANDOM % 5 ) + 1 )) |
|
|
|
if [[ $delay = 0 ]]; then |
|
|
|
sleep $(( ( RANDOM % 5 ) + 1 )) |
|
|
|
fi |
|
|
|
else |
|
|
|
#test |
|
|
|
|
|
|
|
biID=$(sed -e 's/[^0-9_]/_/g' <<< $tID) |
|
|
|
biID=$(sed -e 's/[^0-9_]/_/g' <<< "$tID") |
|
|
|
biLink="https://lego.brickinstructions.com/pdfdrop/" |
|
|
|
if validate_url "$biLink$biID.pdf"; then |
|
|
|
echo -ne "Found on BrickInstructions.com... Downloading..." |
|
|
|
curl -H "Mozilla/5.0 (platform; rv:75.0) Gecko/20100101 Firefox/75.0" -L "$biLink$tID.pdf" --silent --output "$downloadFolder/$filename" |
|
|
|
if [ -f "$downloadFolder/$filename" ]; then |
|
|
|
echo "Done! > $filename" |
|
|
|
|
|
|
|
if [ "$DEBUG" != true ]; then |
|
|
|
curl -H "Mozilla/5.0 (platform; rv:75.0) Gecko/20100101 Firefox/75.0" -L "$biLink$tID.pdf" --silent --output "$downloadFolder/$filename" |
|
|
|
fi |
|
|
|
if [ $(head -c 4 "$downloadFolder/$filename") = "%PDF" ]; then |
|
|
|
if [ -f "$downloadFolder/$filename" ]; then |
|
|
|
echo "Done! > $filename" |
|
|
|
else |
|
|
|
echo "ERROR!" |
|
|
|
if [[ $error_level = 1 || $error_level = 2 ]]; then |
|
|
|
echo "--> Not downloaded. Try again manually..." |
|
|
|
echo "$filename was not downloaded. Check CURL" >> $logfile |
|
|
|
fi |
|
|
|
fi |
|
|
|
else |
|
|
|
echo "ERROR!" |
|
|
|
if [[ $error_level = 1 || $error_level = 2 ]]; then |
|
|
|
echo "--> Not downloaded. Try again manually..." |
|
|
|
echo "ERROR!" |
|
|
|
rm "$downloadFolder/$filename" |
|
|
|
if [[ $error_level = 1 || $error_level = 2 ]]; then |
|
|
|
echo "--> File is not a PDF..." |
|
|
|
echo "$filename was not downloaded. Check CURL" >> $logfile |
|
|
|
fi |
|
|
|
fi |
|
|
|
fi |
|
|
|
#random sleep in order to not look like a script |
|
|
|
sleep $(( ( RANDOM % 5 ) + 1 )) |
|
|
|
else |
|
|
|
if [[ $delay = 0 ]]; then |
|
|
|
sleep $(( ( RANDOM % 5 ) + 1 )) |
|
|
|
fi |
|
|
|
else |
|
|
|
if [[ $error_level = 1 || $error_level = 2 ]]; then |
|
|
|
echo "-> $tID is not available. Skipping..." |
|
|
|
echo "$filename is not available." >> $logfile |
|
|
@ -166,20 +263,42 @@ |
|
|
|
|
|
|
|
else |
|
|
|
echo -ne "--> $tID downloading now..." |
|
|
|
curl -H "Mozilla/5.0 (platform; rv:75.0) Gecko/20100101 Firefox/75.0" -L $tLINK --silent --output "$downloadFolder/$filename" |
|
|
|
if [ -f "$downloadFolder/$filename" ]; then |
|
|
|
echo "Done! > $filename" |
|
|
|
|
|
|
|
if [ "$DEBUG" != true ]; then |
|
|
|
curl -H "Mozilla/5.0 (platform; rv:75.0) Gecko/20100101 Firefox/75.0" -L "$tLINK" --silent --output "$downloadFolder/$filename" |
|
|
|
fi |
|
|
|
if [ $(head -c 4 "$downloadFolder/$filename") = "%PDF" ]; then |
|
|
|
if [ -f "$downloadFolder/$filename" ]; then |
|
|
|
echo "Done! > $filename" |
|
|
|
else |
|
|
|
echo "ERROR!" |
|
|
|
if [[ $error_level = 1 || $error_level = 2 ]]; then |
|
|
|
echo "--> Not downloaded. Try again manually..." |
|
|
|
echo "$filename was not downloaded. Check CURL" >> $logfile |
|
|
|
fi |
|
|
|
#random sleep in order to not look like a script |
|
|
|
if [[ $delay = 0 ]]; then |
|
|
|
sleep $(( ( RANDOM % 5 ) + 1 )) |
|
|
|
fi |
|
|
|
fi |
|
|
|
else |
|
|
|
echo "ERROR!" |
|
|
|
echo "ERROR!" |
|
|
|
rm "$downloadFolder/$filename" |
|
|
|
if [[ $error_level = 1 || $error_level = 2 ]]; then |
|
|
|
echo "--> Not downloaded. Try again manually..." |
|
|
|
echo "--> File is not a PDF..." |
|
|
|
echo "$filename was not downloaded. Check CURL" >> $logfile |
|
|
|
fi |
|
|
|
#random sleep in order to not look like a script |
|
|
|
sleep $(( ( RANDOM % 5 ) + 1 )) |
|
|
|
fi |
|
|
|
fi |
|
|
|
fi |
|
|
|
fi |
|
|
|
done < instructions.csv |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|