#!/bin/bash # mediawiki_post - Recursively send files in a directory to a Mediawiki instance # Copyright (C) 2020 Pierre Choffet # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . set -ex set -o pipefail ENDPOINT='https://commons.wikimedia.org/w/api.php' NAMESPACE='Data' UPLOAD_MAX_RATE=15 # Per minute RESTART_AFTER_PATH='' # i.e. weather.gc.ca/Monthly/1013241.tab USERNAME_PATH='login_username' PASSWORD_PATH='login_password' LOGIN_TOKEN_PATH='login_token' CSRF_TOKEN_PATH='csrf_token' COOKIE_JAR='cookie_jar' SOURCE="${1}" readLoginToken() { LOGIN_TOKEN=$(cat "${LOGIN_TOKEN_PATH}") LOGIN_TOKEN="${LOGIN_TOKEN/+/%2B}" LOGIN_TOKEN="${LOGIN_TOKEN/\\/%5C}" } readCSRFToken() { CSRF_TOKEN=$(cat "${CSRF_TOKEN_PATH}") CSRF_TOKEN="${CSRF_TOKEN/+/%2B}" CSRF_TOKEN="${CSRF_TOKEN/\\/%5C}" } requestLoginToken() { local -r body=$(curl -X POST -d 'action=query' -d 'meta=tokens' -d 'type=login' \ -d 'format=xml' -b "${COOKIE_JAR}" -c "${COOKIE_JAR}" "${ENDPOINT}") local -r login_token="$(echo "${body}" | xmlstarlet sel -t -v //tokens/@logintoken -)" echo "${login_token}" } requestCSRFToken() { local -r body=$(curl -X POST -d 'action=query' -d 'meta=tokens' \ -d 'format=xml' -b "${COOKIE_JAR}" -c "${COOKIE_JAR}" "${ENDPOINT}") local -r csrf_token="$(echo "${body}" | xmlstarlet sel -t -v //tokens/@csrftoken -)" echo "${csrf_token}" } login() { if [ ! -f "${LOGIN_TOKEN_PATH}" ] then requestLoginToken > "${LOGIN_TOKEN_PATH}" fi readLoginToken if [ ! -f "${USERNAME_PATH}" ]||[ ! -f "${PASSWORD_PATH}" ] then echo "What wiki account to use?" read -p 'Username: ' USERNAME read -sp 'Password: ' PASSWORD else USERNAME="$(cat "${USERNAME_PATH}")" PASSWORD="$(cat "${PASSWORD_PATH}")" fi local -r body=$(curl -X POST -d 'action=login' --data-urlencode "lgname=${USERNAME}" --data-urlencode "lgpassword=${PASSWORD}" -d "lgtoken=${LOGIN_TOKEN}" -d 'format=xml' -b "${COOKIE_JAR}" -c "${COOKIE_JAR}" "${ENDPOINT}") local -r result=$(echo "${body}" | xmlstarlet sel -t -v '//login/@result' -) case "${result}" in NeedToken) requestLoginToken > "${LOGIN_TOKEN_PATH}" readLoginToken ;; Success) echo "${USERNAME}" > "${USERNAME_PATH}" echo "${PASSWORD}" > "${PASSWORD_PATH}" echo 'Logged in.' ;; Failed) echo 'Login failed. Wrong credentials?' exit 1 ;; *) echo "Unknown login result: ${result}. Exiting." exit 1 esac } if [ -z "${SOURCE}" ] then echo 'Upload files to Mediawiki.' echo 'Usage: mediawiki_post.sh ' exit 1 fi login requestCSRFToken > "${CSRF_TOKEN_PATH}" readCSRFToken # Prepare start offset if [ "${RESTART_AFTER_PATH}" == '' ] then START_OFFSET_REACHED=1 else START_OFFSET_REACHED=0 fi MINUTE=$(date +%M) MINUTE_UPLOADS=0 while IFS= read -r -d '' -u 9 do # Prepare path URI_PATH=${NAMESPACE}:$(realpath --relative-to="${SOURCE}" "${REPLY}") # Check start offset if [ ${START_OFFSET_REACHED} -eq 0 ] then if [ ${URI_PATH} == "${NAMESPACE}:${RESTART_AFTER_PATH}" ] then START_OFFSET_REACHED=1 fi continue fi # Upload BODY=$(curl -X POST -d 'action=edit' --data-urlencode "title=${URI_PATH}" --data-urlencode "text@${REPLY}" -d "token=${CSRF_TOKEN}" -d 'format=xml' -d 'bot=1' -b "${COOKIE_JAR}" -c "${COOKIE_JAR}" "${ENDPOINT}") RESULT=$(echo "${BODY}" | xmlstarlet sel -t -v '/api/edit/@result' -) case "${RESULT}" in Success) SECOND_NOW=$(date +%S) MINUTE_NOW=$(date +%M) MINUTE_UPLOADS=$((MINUTE_UPLOADS+1)) if [ ${MINUTE_NOW} -ne ${MINUTE} ] then MINUTE=$(date +%M) MINUTE_UPLOADS=0 elif [ ${MINUTE_NOW} -eq ${MINUTE} ]&&[ ${MINUTE_UPLOADS} -eq ${UPLOAD_MAX_RATE} ] then sleep $((60-SECOND_NOW)) MINUTE=$(date +%M) MINUTE_UPLOADS=0 fi echo "Everything went right. Continue…" ;; *) echo "Unknown code: ${RESULT}. Exiting." exit 1 esac done 9< <( find "${SOURCE}" -type f -name '*.tab' -print0 )