#!/bin/bash
# mediawiki_post - Recursively send files in a directory to a Mediawiki instance
# Copyright (C) 2020 Pierre Choffet
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
set -ex
set -o pipefail
ENDPOINT='https://commons.wikimedia.org/w/api.php'
NAMESPACE='Data'
UPLOAD_MAX_RATE=15 # Per minute
RESTART_AFTER_PATH='' # i.e. weather.gc.ca/Monthly/1013241.tab
USERNAME_PATH='login_username'
PASSWORD_PATH='login_password'
LOGIN_TOKEN_PATH='login_token'
CSRF_TOKEN_PATH='csrf_token'
COOKIE_JAR='cookie_jar'
SOURCE="${1}"
readLoginToken() {
LOGIN_TOKEN=$(cat "${LOGIN_TOKEN_PATH}")
LOGIN_TOKEN="${LOGIN_TOKEN/+/%2B}"
LOGIN_TOKEN="${LOGIN_TOKEN/\\/%5C}"
}
readCSRFToken() {
CSRF_TOKEN=$(cat "${CSRF_TOKEN_PATH}")
CSRF_TOKEN="${CSRF_TOKEN/+/%2B}"
CSRF_TOKEN="${CSRF_TOKEN/\\/%5C}"
}
requestLoginToken() {
local -r body=$(curl -X POST -d 'action=query' -d 'meta=tokens' -d 'type=login' \
-d 'format=xml' -b "${COOKIE_JAR}" -c "${COOKIE_JAR}" "${ENDPOINT}")
local -r login_token="$(echo "${body}" | xmlstarlet sel -t -v //tokens/@logintoken -)"
echo "${login_token}"
}
requestCSRFToken() {
local -r body=$(curl -X POST -d 'action=query' -d 'meta=tokens' \
-d 'format=xml' -b "${COOKIE_JAR}" -c "${COOKIE_JAR}" "${ENDPOINT}")
local -r csrf_token="$(echo "${body}" | xmlstarlet sel -t -v //tokens/@csrftoken -)"
echo "${csrf_token}"
}
login() {
if [ ! -f "${LOGIN_TOKEN_PATH}" ]
then
requestLoginToken > "${LOGIN_TOKEN_PATH}"
fi
readLoginToken
if [ ! -f "${USERNAME_PATH}" ]||[ ! -f "${PASSWORD_PATH}" ]
then
echo "What wiki account to use?"
read -p 'Username: ' USERNAME
read -sp 'Password: ' PASSWORD
else
USERNAME="$(cat "${USERNAME_PATH}")"
PASSWORD="$(cat "${PASSWORD_PATH}")"
fi
local -r body=$(curl -X POST -d 'action=login' --data-urlencode "lgname=${USERNAME}" --data-urlencode "lgpassword=${PASSWORD}" -d "lgtoken=${LOGIN_TOKEN}" -d 'format=xml' -b "${COOKIE_JAR}" -c "${COOKIE_JAR}" "${ENDPOINT}")
local -r result=$(echo "${body}" | xmlstarlet sel -t -v '//login/@result' -)
case "${result}" in
NeedToken)
requestLoginToken > "${LOGIN_TOKEN_PATH}"
readLoginToken
;;
Success)
echo "${USERNAME}" > "${USERNAME_PATH}"
echo "${PASSWORD}" > "${PASSWORD_PATH}"
echo 'Logged in.'
;;
Failed)
echo 'Login failed. Wrong credentials?'
exit 1
;;
*)
echo "Unknown login result: ${result}. Exiting."
exit 1
esac
}
if [ -z "${SOURCE}" ]
then
echo 'Upload files to Mediawiki.'
echo 'Usage: mediawiki_post.sh '
exit 1
fi
login
requestCSRFToken > "${CSRF_TOKEN_PATH}"
readCSRFToken
# Prepare start offset
if [ "${RESTART_AFTER_PATH}" == '' ]
then
START_OFFSET_REACHED=1
else
START_OFFSET_REACHED=0
fi
MINUTE=$(date +%M)
MINUTE_UPLOADS=0
while IFS= read -r -d '' -u 9
do
# Prepare path
URI_PATH=${NAMESPACE}:$(realpath --relative-to="${SOURCE}" "${REPLY}")
# Check start offset
if [ ${START_OFFSET_REACHED} -eq 0 ]
then
if [ ${URI_PATH} == "${NAMESPACE}:${RESTART_AFTER_PATH}" ]
then
START_OFFSET_REACHED=1
fi
continue
fi
# Upload
BODY=$(curl -X POST -d 'action=edit' --data-urlencode "title=${URI_PATH}" --data-urlencode "text@${REPLY}" -d "token=${CSRF_TOKEN}" -d 'format=xml' -d 'bot=1' -b "${COOKIE_JAR}" -c "${COOKIE_JAR}" "${ENDPOINT}")
RESULT=$(echo "${BODY}" | xmlstarlet sel -t -v '/api/edit/@result' -)
case "${RESULT}" in
Success)
SECOND_NOW=$(date +%S)
MINUTE_NOW=$(date +%M)
MINUTE_UPLOADS=$((MINUTE_UPLOADS+1))
if [ ${MINUTE_NOW} -ne ${MINUTE} ]
then
MINUTE=$(date +%M)
MINUTE_UPLOADS=0
elif [ ${MINUTE_NOW} -eq ${MINUTE} ]&&[ ${MINUTE_UPLOADS} -eq ${UPLOAD_MAX_RATE} ]
then
sleep $((60-SECOND_NOW))
MINUTE=$(date +%M)
MINUTE_UPLOADS=0
fi
echo "Everything went right. Continue…"
;;
*)
echo "Unknown code: ${RESULT}. Exiting."
exit 1
esac
done 9< <( find "${SOURCE}" -type f -name '*.tab' -print0 )