X-Git-Url: https://git.wikimedia.ca/?p=eccc_to_commons.git;a=blobdiff_plain;f=mediawiki_post.sh;h=1f0d382c2f02dfe3f91d6edf074bbf9be549e8ee;hp=b0d0127dea98f366acf7258281de918998b2e93f;hb=refs%2Fheads%2Fmaster;hpb=ea933e3293cb98e64656291c69d8f098104b8256 diff --git a/mediawiki_post.sh b/mediawiki_post.sh index b0d0127..1f0d382 100755 --- a/mediawiki_post.sh +++ b/mediawiki_post.sh @@ -21,6 +21,8 @@ set -o pipefail ENDPOINT='https://commons.wikimedia.org/w/api.php' NAMESPACE='Data' +UPLOAD_MAX_RATE=15 # Per minute +RESTART_AFTER_PATH='' # i.e. weather.gc.ca/Monthly/1013241.tab USERNAME_PATH='login_username' PASSWORD_PATH='login_password' @@ -109,20 +111,58 @@ login requestCSRFToken > "${CSRF_TOKEN_PATH}" readCSRFToken +# Prepare start offset +if [ "${RESTART_AFTER_PATH}" == '' ] +then + START_OFFSET_REACHED=1 +else + START_OFFSET_REACHED=0 +fi + +MINUTE=$(date +%M) +MINUTE_UPLOADS=0 + while IFS= read -r -d '' -u 9 do + # Prepare path URI_PATH=${NAMESPACE}:$(realpath --relative-to="${SOURCE}" "${REPLY}") - BODY=$(curl -X POST -d 'action=edit' --data-urlencode "title=${URI_PATH}" --data-urlencode "text@${REPLY}" -d "token=${CSRF_TOKEN}" -d 'format=xml' -b "${COOKIE_JAR}" -c "${COOKIE_JAR}" "${ENDPOINT}") + + # Check start offset + if [ ${START_OFFSET_REACHED} -eq 0 ] + then + if [ ${URI_PATH} == "${NAMESPACE}:${RESTART_AFTER_PATH}" ] + then + START_OFFSET_REACHED=1 + fi + + continue + fi + + # Upload + BODY=$(curl -X POST -d 'action=edit' --data-urlencode "title=${URI_PATH}" --data-urlencode "text@${REPLY}" -d "token=${CSRF_TOKEN}" -d 'format=xml' -d 'bot=1' -b "${COOKIE_JAR}" -c "${COOKIE_JAR}" "${ENDPOINT}") RESULT=$(echo "${BODY}" | xmlstarlet sel -t -v '/api/edit/@result' -) case "${RESULT}" in Success) + SECOND_NOW=$(date +%S) + MINUTE_NOW=$(date +%M) + MINUTE_UPLOADS=$((MINUTE_UPLOADS+1)) + + if [ ${MINUTE_NOW} -ne ${MINUTE} ] + then + MINUTE=$(date +%M) + MINUTE_UPLOADS=0 + elif [ ${MINUTE_NOW} -eq ${MINUTE} ]&&[ ${MINUTE_UPLOADS} -eq ${UPLOAD_MAX_RATE} ] + then + sleep $((60-SECOND_NOW)) + MINUTE=$(date +%M) + MINUTE_UPLOADS=0 + fi + echo "Everything went right. Continue…" ;; *) echo "Unknown code: ${RESULT}. Exiting." exit 1 esac - - exit 0 -done 9< <( find "${SOURCE}" -type f -print0 ) +done 9< <( find "${SOURCE}" -type f -name '*.tab' -print0 )