-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfetch_book.sh
executable file
·187 lines (164 loc) · 6.41 KB
/
fetch_book.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
#!/usr/bin/env bash
set -euo pipefail
origin=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) || exit
escape_url() {
printf %s "$1" | jq -sRr @uri
}
trim() {
sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' <<< "$1"
}
parseNumber() {
echo "$1" | tr ',' '.' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//'
}
if [ $# -eq 1 ] && [ -f "$1" ]; then
filename=$(basename "$1")
if [ "${filename##*.}" = "docx" ]; then
echo "Provided file is a .docx, converting to text"
filename_b=$(basename "$1" .docx)
echo "filename_b=$filename_b"
# unzip -p "$1" word/document.xml | sed -e 's/<\/w:p>/ /g; s/<[^>]\{1,\}>/ /g; s/[^[:print:]]\{1,\}/ /g'
docx2txt.sh "$1"
file="${PWD}/${filename_b}.txt"
cd "$(dirname "$1")"
mv "${filename_b}.txt" "${file}"
echo "--------"
echo "STOP will most probably need post processing of resulting ${file} to follow requirements"
echo ""
echo "title | author | rating | description"
echo ""
echo " Where title & author are mandatory, rating falls back to 0 (unrated) and description is optional"
echo " Rating can be X/Y or X, rating value is always converted to 10 upper bound (3 is then 3/10, 3.5/5 is then 7/10)"
echo "--------"
exit 1
else
file="$1"
fi
echo "Reading book information from file ${file}"
pivot=0
page_size=20
limit=$((pivot+page_size))
line_nb=0
# FIXME ensure last line is properly read even if no newline at the end
while IFS= read -r line; do
[ -z "${line}" ] && continue
echo ""
echo "${line_nb} ---------------"
line_nb=$((line_nb+1))
# there are Google API rate limits, doing by batch can help iterating until limit is ignored
[ ${line_nb} -lt "${pivot}" ] && continue
[ ${line_nb} -gt "${limit}" ] && break
title=$(trim "$(cut -d'|' -f1 <<< "${line}")")
author=$(trim "$(cut -d'|' -f2 <<< "${line}")")
rating_str=$(echo "${line}" | cut -d'|' -f3)
if [ -n "${rating_str}" ]; then
rating_value=$(parseNumber "$(cut -d/ -f1 <<< "${rating_str}")")
echo "rating_value=$rating_value"
rating_upper_bound=$(parseNumber "$(cut -d/ -f2 <<< "${rating_str}")")
# we want rating to be in [0-10]
rating=$(bc <<< "${rating_value} * 10 / ${rating_upper_bound}")
else
rating=0
fi
description=$(trim "$(cut -d'|' -f4 <<< "${line}")")
# call ourselves with values instead of file
"${BASH_SOURCE[0]}" "${title}" "${author}" "${rating}" "${description}"
done < "${file}"
elif [ $# -gt 4 ]; then
echo "Usage:"
echo " either $0 \"title\" \"author\" [rating] [description]"
echo " - rating is an integer in [0-10] (0 means unrated)"
echo " or $0 file.txt"
echo " where file.txt contains lines with the following format:"
echo " title|author|rating|description"
echo " rating is an integer in [0-10] (0 means unrated)"
echo " rating can also be value/upper (e.g. 3/5) to be converted to a 10-based rating"
exit 1
else
if [ -z "${3:-""}" ] || [ "${3:-"0"}" = "0" ]; then
rating="0 # TBD"
else
rating=${3}
fi
if [ -z "${4:-""}" ]; then
description=""
else
description="${4}"
fi
mkdir -p "${origin}/content/book"
mkdir -p "${origin}/content/cover"
book_title_query=$(trim "$(tr "[:upper:]" "[:lower:]" <<< "${1}")")
book_author_query=$(trim "$(tr "[:upper:]" "[:lower:]" <<< "${2}")")
book_query="'${book_title_query}' by '${book_author_query}'"
echo "Query book information for ${book_query}"
book_data=$(curl -s --get \
--data "key=${GOOGLE_BOOKS_API_KEY}" \
--data "langRestrict=fr" \
--data-urlencode "q=${book_title_query} by ${book_author_query}" \
'https://www.googleapis.com/books/v1/volumes' \
-H 'Accept: application/json')
isbn=""
if [ "$(jq -r .totalItems <<< "${book_data}")" -gt 0 ]; then
selected_volume=$(jq -r 'first(.items[] | select(.volumeInfo.industryIdentifiers[] | select(.type == "ISBN_13")))' <<< "${book_data}")
title="$(jq -r .volumeInfo.title <<< "${selected_volume}")"
author="$(jq -r .volumeInfo.authors[0] <<< "${selected_volume}")"
echo "Retrieved some book information for ${book_query}: '${title}' by '${author}'"
isbn="$(jq -r '.volumeInfo.industryIdentifiers[] | select(.type == "ISBN_13") | .identifier' <<< "${selected_volume}")"
if [ "${isbn}" = "null" ] || [ -z "${isbn}" ]; then
isbn=""
else
echo "Found ISBN 13 '${isbn}'"
fi
if [ -z "${description}" ]; then
description="$(jq -r '.volumeInfo.description' <<< "${selected_volume}")"
fi
cover_full_url=$(jq -r .volumeInfo.imageLinks.thumbnail <<< "${selected_volume}")
if [ "${cover_full_url}" = "null" ] || [ -z "${cover_full_url}" ]; then
echo " ⚠︎ Can't find book cover for ${book_query} (ISBN: ${isbn})"
else
cover_short_url=$(cut -d'&' -f1 <<< "${cover_full_url}")
cover_clean_url="${cover_short_url}&printsec=frontcover&img=1&source=gbs_api"
# FIXME what about non JPEG files?
cover_file="${origin}/content/cover/${isbn}.jpg"
if [ -f "${cover_file}" ]; then
echo "Cover already downloaded for ${book_query} (ISBN: ${isbn})"
elif [ -n "${isbn}" ]; then
echo "Downloading cover for ${book_query} (${isbn})"
echo "Cover URL: ${cover_clean_url}"
curl -s "${cover_clean_url}" > "${cover_file}"
else
echo "Won't try to download cover for ${book_query}, no ISBN to store result"
fi
echo "Cover available: ${cover_file}"
fi
cover_query=$(escape_url "${book_title_query} ${book_author_query}")
echo "Not satisfied with the cover? You can try to find the cover on:"
echo " - Google image: https://www.google.fr/search?q=${cover_query}&udm=2"
echo " - Amazon: https://www.amazon.fr/s?k=${cover_query}&i=stripbooks"
fi
if [ -z "${isbn}" ]; then
echo "Can't find any book or ISBN for ${book_query}"
title="${book_title_query}"
author="${book_author_query}"
isbn="?????????????"
fi
# book file (markdown)
book_file="${origin}/content/book/${isbn}.md"
if [ ! -f "${book_file}" ]; then
uuid=$(uuidgen | tr '[:upper:]' '[:lower:]')
echo "Creating a new book ${book_file} for '${title}' by '${author}'"
echo "---
uuid: ${uuid}
REF: \"${book_query}\"
title: \"${title}\"
author: \"${author}\"
rating: ${rating}
read_date: $(date "+%Y-%m-%d") # TBD
isbn: \"${isbn}\"
---
${description}
" > "${book_file}"
else
echo "Book ${book_file} already exists"
head -n 8 "${book_file}"
fi
fi