Update.
[scripts.git] / arxiv-rename-pdf.sh
1 #!/bin/bash
2
3 #########################################################################
4 # This program is free software: you can redistribute it and/or modify  #
5 # it under the terms of the version 3 of the GNU General Public License #
6 # as published by the Free Software Foundation.                         #
7 #                                                                       #
8 # This program is distributed in the hope that it will be useful, but   #
9 # WITHOUT ANY WARRANTY; without even the implied warranty of            #
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU      #
11 # General Public License for more details.                              #
12 #                                                                       #
13 # You should have received a copy of the GNU General Public License     #
14 # along with this program. If not, see <http://www.gnu.org/licenses/>.  #
15 #                                                                       #
16 # Written by and Copyright (C) Francois Fleuret                         #
17 # Contact <francois.fleuret@idiap.ch> for comments & bug reports        #
18 #########################################################################
19
20
21 set -e
22 set -o pipefail
23
24 [[ "${ARXIV_BIBFILE}" ]] || (echo >&2 "Define \$ARXIV_BIBFILE"; exit 2)
25 [[ "${ARXIV_BIBDIR}" ]] || (echo >&2 "Define \$ARXIV_BIBDIR"; exit 2)
26
27 tmp=$(mktemp /tmp/arxiv-bib.sh.XXXXXX)
28
29 while [[ "$1" ]]
30 do
31
32     if [[ $1 =~ ^http ]]
33     then
34         wget "$1"
35         id="$(echo "$1" | sed -e 's|^.*/\([^\]*\)$|\1|')"
36         filename="${id}.pdf"
37     else
38         filename="$1"
39         id="$(basename ${filename} .pdf)"
40     fi
41
42     while [[ -f "$1.part" ]] # dealing with firefox's download
43     do
44         echo "Waiting for $1.part to vanish"
45         sleep 1
46     done
47
48     echo -n "Fetching info from arxiv.org ... "
49
50     curl -s > "${tmp}" "https://arxiv.org/abs/${id}"
51
52     echo "done."
53
54     ######################################################################
55     # Bibtex entry
56
57     AUTHORS=""
58
59     while read line
60     do
61         [[ "${AUTHORS}" ]] && AUTHORS="${AUTHORS} and "
62         AUTHORS="${AUTHORS}${line}"
63     done < <(grep '<meta name="citation_author"' "${tmp}" | sed -e 's/^.*content="\([^,]*, .\).*$/\1./')
64
65     TITLE=$(grep '<meta name="citation_title"' ${tmp} | sed -e 's/^.*content="\([^"]*\)".*$/\1/')
66     YEAR=$(echo ${id} | sed -e 's/^\(..\).*$/20\1/')
67
68     if grep "${TITLE}" "${ARXIV_BIBFILE}"
69     then
70         echo
71         echo "This article seems to be already in ${ARXIV_BIBFILE}"
72         echo
73     else
74
75         cat >> ${ARXIV_BIBFILE} <<EOF
76
77 @article{arxiv-${id},
78   author={${AUTHORS}},
79   title={${TITLE}},
80   journal={CoRR},
81   volume={abs/${id}},
82   year={${YEAR}},
83   url={https://arxiv.org/pdf/${id}}
84 }
85 EOF
86
87     fi
88
89     ######################################################################
90     # Rename the file
91
92     AUTHORS=""
93     nb_authors=0
94
95     while read line
96     do
97         if [[ "${AUTHORS}" ]]
98         then
99             AUTHORS="${AUTHORS} "
100         else
101             FIRST_AUTHOR="${line}"
102         fi
103         nb_authors=$((nb_authors+1))
104         AUTHORS="${AUTHORS}${line}"
105     done < <(grep '<meta name="citation_author"' "${tmp}" | sed -e 's/^.*content="\([^,.]*\), .*$/\1/')
106
107     TITLE=$(grep '<meta name="citation_title"' ${tmp} | sed -e 's/^.*content="\([^"]*\)".*$/\1/')
108     YEAR=$(echo ${id} | sed -e 's/^\(..\).*$/20\1/')
109
110     if [[ ${nb_authors} -gt 3 ]]
111     then
112         AUTHORS="${FIRST_AUTHOR} et al"
113     fi
114
115     TITLE="$(echo "${AUTHORS} - ${TITLE} ${YEAR} ${id}.pdf" | sed -e 's/ /_/g')"
116
117     mv -v "${filename}" "${ARXIV_BIBDIR}/${TITLE}"
118
119     ######################################################################
120
121     shift
122
123 done
124
125 rm -rf ${tmp}