From: Glaudiston Gomes da Silva Date: Tue, 8 May 2012 01:48:35 +0000 (-0300) Subject: first commit, shell scripts for google books search and download. X-Git-Url: http://vcs.maemo.org/git/?a=commitdiff_plain;h=5d3e10944016cc29129e9f429a4f9ce1eacf5df9;hp=f48bad52feb871bbba668e57ee14d8a341456463;p=gbooks first commit, shell scripts for google books search and download. --- diff --git a/bash/findGoogleBooks.sh b/bash/findGoogleBooks.sh new file mode 100755 index 0000000..e0ecea8 --- /dev/null +++ b/bash/findGoogleBooks.sh @@ -0,0 +1,7 @@ +#!/bin/bash +# Search googlebooks + +APP_KEY="AIzaSyBhXxun8ggcSd6kiJLi7FDrWnXYI8_vk6E"; +SEARCH_STR="$@"; +wget --no-check-certificate -U "Mozilla 5.0" -qO- "https://www.googleapis.com/books/v1/volumes?q=${SEARCH_STR}&key=${APP_KEY}" | grep -E '("title"|"id")' + diff --git a/bash/getgooglebooks.sh b/bash/getgooglebooks.sh new file mode 100755 index 0000000..4363fdc --- /dev/null +++ b/bash/getgooglebooks.sh @@ -0,0 +1,29 @@ +#!/bin/bash +GBOOK_ID="$1"; +SRCLIST=""; +FN_GET_GOOGLEBOOK() { + pg="$1"; + URL_GBOOKS="http://books.google.com/books?id=${GBOOK_ID}&pg=${pg}&jscmd=click3"; + WGET_CMD="wget --no-check-certificate --save-cookies google-cookies.txt --load-cookies google-cookies.txt -q -U \"Mozilla/5.0\""; + GET_RET="$(${WGET_CMD} -O- ${URL_GBOOKS})"; + SRCLIST="${SRCLIST}\n$(echo ${GET_RET}|tr "," "\n" | grep "src")" + [ "$pg" == "PA1" ] && { + echo -e "${GET_RET}" | tr "," "\n" | grep -E "^{\"pid\":" | sed 's/{"pid":"\([^"]*\).*/\1/g'|tr "\n" " " + } +} +ALL_PAGES="$(FN_GET_GOOGLEBOOK PA1;)" +AllPagesCount="$(echo "$ALL_PAGES"|tr " " "\n"|wc -l)" +echo debug1 allpages=$ALL_PAGES, allpagescount=$AllPagesCount +for pg in $ALL_PAGES; +do + PgCount="$(echo -e "$SRCLIST"|sort|uniq|wc -l)" + echo -e "$SRCLIST" | grep "&pg=$pg&" >/dev/null || { + echo -ne "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bGeting the page $pg [$PgCount/$AllPagesCount][$(echo $((PgCount*100/AllPagesCount)))%]" + FN_GET_GOOGLEBOOK "${pg}" + } + echo -e "$SRCLIST" | grep "&pg=$pg&" | while read pgurl; + do $WGET_CMD -O "[GoogleBooks] ${GBOOK_ID} Page ${pg}.png" "$(echo "${pgurl}"|sed 's/^"src":"\([^"]*\)".*/\1/g' )" & + done; +done +echo -e "$SRCLIST" |sort | uniq> ./URLPAGES_${GBOOK_ID}.txt +