minimal package structure
[gbooks] / bash / getgooglebooks.sh
1 #!/bin/bash
2 GBOOK_ID="$1";
3 SRCLIST="";
4 FN_GET_GOOGLEBOOK() {
5         pg="$1";
6         URL_GBOOKS="http://books.google.com/books?id=${GBOOK_ID}&pg=${pg}&jscmd=click3";
7         WGET_CMD="wget --no-check-certificate --save-cookies google-cookies.txt --load-cookies google-cookies.txt -q -U \"Mozilla/5.0\"";
8         GET_RET="$(${WGET_CMD} -O- ${URL_GBOOKS})";
9         SRCLIST="${SRCLIST}\n$(echo ${GET_RET}|tr "," "\n" | grep "src")"
10         [ "$pg" == "PA1" ] && {
11                 echo -e "${GET_RET}" | tr "," "\n" | grep -E "^{\"pid\":" | sed 's/{"pid":"\([^"]*\).*/\1/g'|tr "\n" " "
12         }
13 }
14 ALL_PAGES="$(FN_GET_GOOGLEBOOK PA1;)"
15 AllPagesCount="$(echo "$ALL_PAGES"|tr " " "\n"|wc -l)"
16 echo debug1 allpages=$ALL_PAGES, allpagescount=$AllPagesCount
17 for pg in $ALL_PAGES;
18 do
19         PgCount="$(echo -e "$SRCLIST"|sort|uniq|wc -l)"
20         echo -e "$SRCLIST" | grep "&pg=$pg&" >/dev/null || {
21                 echo -ne "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bGeting the page $pg [$PgCount/$AllPagesCount][$(echo $((PgCount*100/AllPagesCount)))%]"
22                 FN_GET_GOOGLEBOOK "${pg}"
23         }
24         echo -e "$SRCLIST" | grep "&pg=$pg&" | while read pgurl;
25         do $WGET_CMD -O "[GoogleBooks] ${GBOOK_ID} Page ${pg}.png" "$(echo "${pgurl}"|sed 's/^"src":"\([^"]*\)".*/\1/g' )" &
26         done;
27 done
28 echo -e "$SRCLIST" |sort | uniq> ./URLPAGES_${GBOOK_ID}.txt
29