From ea933e3293cb98e64656291c69d8f098104b8256 Mon Sep 17 00:00:00 2001 From: Pierre Choffet Date: Thu, 6 Feb 2020 15:49:57 -0500 Subject: [PATCH] Add script to perform the upload It's a generic Mediawiki batch upload script that takes files in a directory (recursively) and uploads them in a Mediawiki while keeping their tree. Also add login_password into .gitignore to actively prevent contributors to shoot themselves in the foot. --- .gitignore | 1 + README | 10 +++- mediawiki_post.sh | 128 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 138 insertions(+), 1 deletion(-) create mode 100644 .gitignore create mode 100755 mediawiki_post.sh diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..36329c7 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +login_password diff --git a/README b/README index ee13ba5..f6c6fd4 100644 --- a/README +++ b/README @@ -25,6 +25,7 @@ commons_rules.xsd validate ECCC XML from a Wikimedian point of view eccc_to_commons.sh transform ECCC XML files into JSON monthly_to_commons.xslt transform ECCC monthly XML file into JSON almanac_to_commons.xslt transform ECCC almanac XML file into JSON +mediawiki_post.sh upload directory to a Mediawiki Usage: @@ -145,4 +146,11 @@ conversion. 5. Upload to destination -Not done yet. +It's now time to share our work with the world and that's the purpose of the +mediawiki_post.sh script. + + $ ./mediawiki_post.sh "${COMMONS_CACHE}" + +It takes the commons cache as parameter: its file hierarchy will be replicated +on commons. On first run, it will ask credentials for the Mediawiki account to use to +perform the import. diff --git a/mediawiki_post.sh b/mediawiki_post.sh new file mode 100755 index 0000000..b0d0127 --- /dev/null +++ b/mediawiki_post.sh @@ -0,0 +1,128 @@ +#!/bin/bash + +# mediawiki_post - Recursively send files in a directory to a Mediawiki instance +# Copyright (C) 2020 Pierre Choffet +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +set -ex +set -o pipefail + +ENDPOINT='https://commons.wikimedia.org/w/api.php' +NAMESPACE='Data' + +USERNAME_PATH='login_username' +PASSWORD_PATH='login_password' +LOGIN_TOKEN_PATH='login_token' +CSRF_TOKEN_PATH='csrf_token' +COOKIE_JAR='cookie_jar' + +SOURCE="${1}" + +readLoginToken() { + LOGIN_TOKEN=$(cat "${LOGIN_TOKEN_PATH}") + LOGIN_TOKEN="${LOGIN_TOKEN/+/%2B}" + LOGIN_TOKEN="${LOGIN_TOKEN/\\/%5C}" +} + +readCSRFToken() { + CSRF_TOKEN=$(cat "${CSRF_TOKEN_PATH}") + CSRF_TOKEN="${CSRF_TOKEN/+/%2B}" + CSRF_TOKEN="${CSRF_TOKEN/\\/%5C}" +} + +requestLoginToken() { + local -r body=$(curl -X POST -d 'action=query' -d 'meta=tokens' -d 'type=login' \ + -d 'format=xml' -b "${COOKIE_JAR}" -c "${COOKIE_JAR}" "${ENDPOINT}") + local -r login_token="$(echo "${body}" | xmlstarlet sel -t -v //tokens/@logintoken -)" + echo "${login_token}" +} + +requestCSRFToken() { + local -r body=$(curl -X POST -d 'action=query' -d 'meta=tokens' \ + -d 'format=xml' -b "${COOKIE_JAR}" -c "${COOKIE_JAR}" "${ENDPOINT}") + local -r csrf_token="$(echo "${body}" | xmlstarlet sel -t -v //tokens/@csrftoken -)" + echo "${csrf_token}" +} + +login() { + if [ ! -f "${LOGIN_TOKEN_PATH}" ] + then + requestLoginToken > "${LOGIN_TOKEN_PATH}" + fi + + readLoginToken + + if [ ! -f "${USERNAME_PATH}" ]||[ ! -f "${PASSWORD_PATH}" ] + then + echo "What wiki account to use?" + read -p 'Username: ' USERNAME + read -sp 'Password: ' PASSWORD + else + USERNAME="$(cat "${USERNAME_PATH}")" + PASSWORD="$(cat "${PASSWORD_PATH}")" + fi + + local -r body=$(curl -X POST -d 'action=login' --data-urlencode "lgname=${USERNAME}" --data-urlencode "lgpassword=${PASSWORD}" -d "lgtoken=${LOGIN_TOKEN}" -d 'format=xml' -b "${COOKIE_JAR}" -c "${COOKIE_JAR}" "${ENDPOINT}") + local -r result=$(echo "${body}" | xmlstarlet sel -t -v '//login/@result' -) + + case "${result}" in + NeedToken) + requestLoginToken > "${LOGIN_TOKEN_PATH}" + readLoginToken + ;; + Success) + echo "${USERNAME}" > "${USERNAME_PATH}" + echo "${PASSWORD}" > "${PASSWORD_PATH}" + echo 'Logged in.' + ;; + Failed) + echo 'Login failed. Wrong credentials?' + exit 1 + ;; + *) + echo "Unknown login result: ${result}. Exiting." + exit 1 + esac +} + +if [ -z "${SOURCE}" ] +then + echo 'Upload files to Mediawiki.' + echo 'Usage: mediawiki_post.sh ' + exit 1 +fi + +login + +requestCSRFToken > "${CSRF_TOKEN_PATH}" +readCSRFToken + +while IFS= read -r -d '' -u 9 +do + URI_PATH=${NAMESPACE}:$(realpath --relative-to="${SOURCE}" "${REPLY}") + BODY=$(curl -X POST -d 'action=edit' --data-urlencode "title=${URI_PATH}" --data-urlencode "text@${REPLY}" -d "token=${CSRF_TOKEN}" -d 'format=xml' -b "${COOKIE_JAR}" -c "${COOKIE_JAR}" "${ENDPOINT}") + RESULT=$(echo "${BODY}" | xmlstarlet sel -t -v '/api/edit/@result' -) + + case "${RESULT}" in + Success) + echo "Everything went right. Continue…" + ;; + *) + echo "Unknown code: ${RESULT}. Exiting." + exit 1 + esac + + exit 0 +done 9< <( find "${SOURCE}" -type f -print0 ) -- 2.42.0