]> Wikimedia Canada | Git repositories - eccc_to_commons.git/blob - mediawiki_post.sh
Add beginning offset
[eccc_to_commons.git] / mediawiki_post.sh
1 #!/bin/bash
2
3 # mediawiki_post - Recursively send files in a directory to a Mediawiki instance
4 # Copyright (C) 2020 Pierre Choffet
5 #
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
19 set -ex
20 set -o pipefail
21
22 ENDPOINT='https://commons.wikimedia.org/w/api.php'
23 NAMESPACE='Data'
24 UPLOAD_MAX_RATE=15 # Per minute
25 RESTART_AFTER_PATH='' # i.e. weather.gc.ca/Monthly/1013241.tab
26
27 USERNAME_PATH='login_username'
28 PASSWORD_PATH='login_password'
29 LOGIN_TOKEN_PATH='login_token'
30 CSRF_TOKEN_PATH='csrf_token'
31 COOKIE_JAR='cookie_jar'
32
33 SOURCE="${1}"
34
35 readLoginToken() {
36 LOGIN_TOKEN=$(cat "${LOGIN_TOKEN_PATH}")
37 LOGIN_TOKEN="${LOGIN_TOKEN/+/%2B}"
38 LOGIN_TOKEN="${LOGIN_TOKEN/\\/%5C}"
39 }
40
41 readCSRFToken() {
42 CSRF_TOKEN=$(cat "${CSRF_TOKEN_PATH}")
43 CSRF_TOKEN="${CSRF_TOKEN/+/%2B}"
44 CSRF_TOKEN="${CSRF_TOKEN/\\/%5C}"
45 }
46
47 requestLoginToken() {
48 local -r body=$(curl -X POST -d 'action=query' -d 'meta=tokens' -d 'type=login' \
49 -d 'format=xml' -b "${COOKIE_JAR}" -c "${COOKIE_JAR}" "${ENDPOINT}")
50 local -r login_token="$(echo "${body}" | xmlstarlet sel -t -v //tokens/@logintoken -)"
51 echo "${login_token}"
52 }
53
54 requestCSRFToken() {
55 local -r body=$(curl -X POST -d 'action=query' -d 'meta=tokens' \
56 -d 'format=xml' -b "${COOKIE_JAR}" -c "${COOKIE_JAR}" "${ENDPOINT}")
57 local -r csrf_token="$(echo "${body}" | xmlstarlet sel -t -v //tokens/@csrftoken -)"
58 echo "${csrf_token}"
59 }
60
61 login() {
62 if [ ! -f "${LOGIN_TOKEN_PATH}" ]
63 then
64 requestLoginToken > "${LOGIN_TOKEN_PATH}"
65 fi
66
67 readLoginToken
68
69 if [ ! -f "${USERNAME_PATH}" ]||[ ! -f "${PASSWORD_PATH}" ]
70 then
71 echo "What wiki account to use?"
72 read -p 'Username: ' USERNAME
73 read -sp 'Password: ' PASSWORD
74 else
75 USERNAME="$(cat "${USERNAME_PATH}")"
76 PASSWORD="$(cat "${PASSWORD_PATH}")"
77 fi
78
79 local -r body=$(curl -X POST -d 'action=login' --data-urlencode "lgname=${USERNAME}" --data-urlencode "lgpassword=${PASSWORD}" -d "lgtoken=${LOGIN_TOKEN}" -d 'format=xml' -b "${COOKIE_JAR}" -c "${COOKIE_JAR}" "${ENDPOINT}")
80 local -r result=$(echo "${body}" | xmlstarlet sel -t -v '//login/@result' -)
81
82 case "${result}" in
83 NeedToken)
84 requestLoginToken > "${LOGIN_TOKEN_PATH}"
85 readLoginToken
86 ;;
87 Success)
88 echo "${USERNAME}" > "${USERNAME_PATH}"
89 echo "${PASSWORD}" > "${PASSWORD_PATH}"
90 echo 'Logged in.'
91 ;;
92 Failed)
93 echo 'Login failed. Wrong credentials?'
94 exit 1
95 ;;
96 *)
97 echo "Unknown login result: ${result}. Exiting."
98 exit 1
99 esac
100 }
101
102 if [ -z "${SOURCE}" ]
103 then
104 echo 'Upload files to Mediawiki.'
105 echo 'Usage: mediawiki_post.sh <source folder>'
106 exit 1
107 fi
108
109 login
110
111 requestCSRFToken > "${CSRF_TOKEN_PATH}"
112 readCSRFToken
113
114 # Prepare start offset
115 if [ "${RESTART_AFTER_PATH}" == '' ]
116 then
117 START_OFFSET_REACHED=1
118 else
119 START_OFFSET_REACHED=0
120 fi
121
122 MINUTE=$(date +%M)
123 MINUTE_UPLOADS=0
124
125 while IFS= read -r -d '' -u 9
126 do
127 # Prepare path
128 URI_PATH=${NAMESPACE}:$(realpath --relative-to="${SOURCE}" "${REPLY}")
129
130 # Check start offset
131 if [ ${START_OFFSET_REACHED} -eq 0 ]
132 then
133 if [ ${URI_PATH} == "${NAMESPACE}:${RESTART_AFTER_PATH}" ]
134 then
135 START_OFFSET_REACHED=1
136 fi
137
138 continue
139 fi
140
141 # Upload
142 BODY=$(curl -X POST -d 'action=edit' --data-urlencode "title=${URI_PATH}" --data-urlencode "text@${REPLY}" -d "token=${CSRF_TOKEN}" -d 'format=xml' -b "${COOKIE_JAR}" -c "${COOKIE_JAR}" "${ENDPOINT}")
143 RESULT=$(echo "${BODY}" | xmlstarlet sel -t -v '/api/edit/@result' -)
144
145 case "${RESULT}" in
146 Success)
147 SECOND_NOW=$(date +%S)
148 MINUTE_NOW=$(date +%M)
149 MINUTE_UPLOADS=$((MINUTE_UPLOADS+1))
150
151 if [ ${MINUTE_NOW} -ne ${MINUTE} ]
152 then
153 MINUTE=$(date +%M)
154 MINUTE_UPLOADS=0
155 elif [ ${MINUTE_NOW} -eq ${MINUTE} ]&&[ ${MINUTE_UPLOADS} -eq ${UPLOAD_MAX_RATE} ]
156 then
157 sleep $((60-SECOND_NOW))
158 MINUTE=$(date +%M)
159 MINUTE_UPLOADS=0
160 fi
161
162 echo "Everything went right. Continue…"
163 ;;
164 *)
165 echo "Unknown code: ${RESULT}. Exiting."
166 exit 1
167 esac
168 done 9< <( find "${SOURCE}" -type f -name '*.tab' -print0 )