]> Wikimedia Canada | Git repositories - eccc_to_commons.git/blob - mediawiki_post.sh
Less naive rate limiter
[eccc_to_commons.git] / mediawiki_post.sh
1 #!/bin/bash
2
3 # mediawiki_post - Recursively send files in a directory to a Mediawiki instance
4 # Copyright (C) 2020 Pierre Choffet
5 #
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
19 set -ex
20 set -o pipefail
21
22 ENDPOINT='https://commons.wikimedia.org/w/api.php'
23 NAMESPACE='Data'
24 UPLOAD_MAX_RATE=15 # Per minute
25
26 USERNAME_PATH='login_username'
27 PASSWORD_PATH='login_password'
28 LOGIN_TOKEN_PATH='login_token'
29 CSRF_TOKEN_PATH='csrf_token'
30 COOKIE_JAR='cookie_jar'
31
32 SOURCE="${1}"
33
34 readLoginToken() {
35 LOGIN_TOKEN=$(cat "${LOGIN_TOKEN_PATH}")
36 LOGIN_TOKEN="${LOGIN_TOKEN/+/%2B}"
37 LOGIN_TOKEN="${LOGIN_TOKEN/\\/%5C}"
38 }
39
40 readCSRFToken() {
41 CSRF_TOKEN=$(cat "${CSRF_TOKEN_PATH}")
42 CSRF_TOKEN="${CSRF_TOKEN/+/%2B}"
43 CSRF_TOKEN="${CSRF_TOKEN/\\/%5C}"
44 }
45
46 requestLoginToken() {
47 local -r body=$(curl -X POST -d 'action=query' -d 'meta=tokens' -d 'type=login' \
48 -d 'format=xml' -b "${COOKIE_JAR}" -c "${COOKIE_JAR}" "${ENDPOINT}")
49 local -r login_token="$(echo "${body}" | xmlstarlet sel -t -v //tokens/@logintoken -)"
50 echo "${login_token}"
51 }
52
53 requestCSRFToken() {
54 local -r body=$(curl -X POST -d 'action=query' -d 'meta=tokens' \
55 -d 'format=xml' -b "${COOKIE_JAR}" -c "${COOKIE_JAR}" "${ENDPOINT}")
56 local -r csrf_token="$(echo "${body}" | xmlstarlet sel -t -v //tokens/@csrftoken -)"
57 echo "${csrf_token}"
58 }
59
60 login() {
61 if [ ! -f "${LOGIN_TOKEN_PATH}" ]
62 then
63 requestLoginToken > "${LOGIN_TOKEN_PATH}"
64 fi
65
66 readLoginToken
67
68 if [ ! -f "${USERNAME_PATH}" ]||[ ! -f "${PASSWORD_PATH}" ]
69 then
70 echo "What wiki account to use?"
71 read -p 'Username: ' USERNAME
72 read -sp 'Password: ' PASSWORD
73 else
74 USERNAME="$(cat "${USERNAME_PATH}")"
75 PASSWORD="$(cat "${PASSWORD_PATH}")"
76 fi
77
78 local -r body=$(curl -X POST -d 'action=login' --data-urlencode "lgname=${USERNAME}" --data-urlencode "lgpassword=${PASSWORD}" -d "lgtoken=${LOGIN_TOKEN}" -d 'format=xml' -b "${COOKIE_JAR}" -c "${COOKIE_JAR}" "${ENDPOINT}")
79 local -r result=$(echo "${body}" | xmlstarlet sel -t -v '//login/@result' -)
80
81 case "${result}" in
82 NeedToken)
83 requestLoginToken > "${LOGIN_TOKEN_PATH}"
84 readLoginToken
85 ;;
86 Success)
87 echo "${USERNAME}" > "${USERNAME_PATH}"
88 echo "${PASSWORD}" > "${PASSWORD_PATH}"
89 echo 'Logged in.'
90 ;;
91 Failed)
92 echo 'Login failed. Wrong credentials?'
93 exit 1
94 ;;
95 *)
96 echo "Unknown login result: ${result}. Exiting."
97 exit 1
98 esac
99 }
100
101 if [ -z "${SOURCE}" ]
102 then
103 echo 'Upload files to Mediawiki.'
104 echo 'Usage: mediawiki_post.sh <source folder>'
105 exit 1
106 fi
107
108 login
109
110 requestCSRFToken > "${CSRF_TOKEN_PATH}"
111 readCSRFToken
112
113 MINUTE=$(date +%M)
114 MINUTE_UPLOADS=0
115 while IFS= read -r -d '' -u 9
116 do
117 URI_PATH=${NAMESPACE}:$(realpath --relative-to="${SOURCE}" "${REPLY}")
118 BODY=$(curl -X POST -d 'action=edit' --data-urlencode "title=${URI_PATH}" --data-urlencode "text@${REPLY}" -d "token=${CSRF_TOKEN}" -d 'format=xml' -b "${COOKIE_JAR}" -c "${COOKIE_JAR}" "${ENDPOINT}")
119 RESULT=$(echo "${BODY}" | xmlstarlet sel -t -v '/api/edit/@result' -)
120
121 case "${RESULT}" in
122 Success)
123 SECOND_NOW=$(date +%S)
124 MINUTE_NOW=$(date +%M)
125 MINUTE_UPLOADS=$((MINUTE_UPLOADS+1))
126
127 if [ ${MINUTE_NOW} -ne ${MINUTE} ]
128 then
129 MINUTE=$(date +%M)
130 MINUTE_UPLOADS=0
131 elif [ ${MINUTE_NOW} -eq ${MINUTE} ]&&[ ${MINUTE_UPLOADS} -eq ${UPLOAD_MAX_RATE} ]
132 then
133 sleep $((60-SECOND_NOW))
134 MINUTE=$(date +%M)
135 MINUTE_UPLOADS=0
136 fi
137
138 echo "Everything went right. Continue…"
139 ;;
140 *)
141 echo "Unknown code: ${RESULT}. Exiting."
142 exit 1
143 esac
144 done 9< <( find "${SOURCE}" -type f -name '*.tab' -print0 )