]> Wikimedia Canada | Git repositories - eccc_to_commons.git/blobdiff - mediawiki_post.sh
Rewrite almanach merge logic
[eccc_to_commons.git] / mediawiki_post.sh
index 95c8d56be3d38c63fb184b5d4fc440ba4387caae..1f0d382c2f02dfe3f91d6edf074bbf9be549e8ee 100755 (executable)
@@ -21,6 +21,8 @@ set -o pipefail
 
 ENDPOINT='https://commons.wikimedia.org/w/api.php'
 NAMESPACE='Data'
+UPLOAD_MAX_RATE=15    # Per minute
+RESTART_AFTER_PATH='' # i.e. weather.gc.ca/Monthly/1013241.tab
 
 USERNAME_PATH='login_username'
 PASSWORD_PATH='login_password'
@@ -109,16 +111,55 @@ login
 requestCSRFToken > "${CSRF_TOKEN_PATH}"
 readCSRFToken
 
+# Prepare start offset
+if [ "${RESTART_AFTER_PATH}" == '' ]
+then
+       START_OFFSET_REACHED=1
+else
+       START_OFFSET_REACHED=0
+fi
+
+MINUTE=$(date +%M)
+MINUTE_UPLOADS=0
+
 while IFS= read -r -d '' -u 9
 do
+       # Prepare path
        URI_PATH=${NAMESPACE}:$(realpath --relative-to="${SOURCE}" "${REPLY}")
-       BODY=$(curl -X POST -d 'action=edit' --data-urlencode "title=${URI_PATH}" --data-urlencode "text@${REPLY}" -d "token=${CSRF_TOKEN}" -d 'format=xml' -b "${COOKIE_JAR}" -c "${COOKIE_JAR}" "${ENDPOINT}")
+
+       # Check start offset
+       if [ ${START_OFFSET_REACHED} -eq 0 ]
+       then
+               if [ ${URI_PATH} == "${NAMESPACE}:${RESTART_AFTER_PATH}" ]
+               then
+                       START_OFFSET_REACHED=1
+               fi
+
+               continue
+       fi
+
+       # Upload
+       BODY=$(curl -X POST -d 'action=edit' --data-urlencode "title=${URI_PATH}" --data-urlencode "text@${REPLY}" -d "token=${CSRF_TOKEN}" -d 'format=xml' -d 'bot=1' -b "${COOKIE_JAR}" -c "${COOKIE_JAR}" "${ENDPOINT}")
        RESULT=$(echo "${BODY}" | xmlstarlet sel -t -v '/api/edit/@result' -)
 
        case "${RESULT}" in
        Success)
+               SECOND_NOW=$(date +%S)
+               MINUTE_NOW=$(date +%M)
+               MINUTE_UPLOADS=$((MINUTE_UPLOADS+1))
+
+               if [ ${MINUTE_NOW} -ne ${MINUTE} ]
+               then
+                       MINUTE=$(date +%M)
+                       MINUTE_UPLOADS=0
+               elif [ ${MINUTE_NOW} -eq ${MINUTE} ]&&[ ${MINUTE_UPLOADS} -eq ${UPLOAD_MAX_RATE} ]
+               then
+                       sleep $((60-SECOND_NOW))
+                       MINUTE=$(date +%M)
+                       MINUTE_UPLOADS=0
+               fi
+
                echo "Everything went right. Continue…"
-               sleep 1
                ;;
        *)
                echo "Unknown code: ${RESULT}. Exiting."