#!/bin/bash # eccc_to_commons - Batch convert Environment and Climate change Canada # historical XML data into a JSON format suitable for # Wikimedia Commons. # Copyright (C) 2019-2020 Pierre Choffet # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . set -e SOURCE="${1}" DESTINATION="${2}" if [ -z "${SOURCE}" ]||[ -z "${DESTINATION}" ]||[ ! -d "${DESTINATION}" ] then echo 'Fix XMLs provided by Environment and Climate change Canada' echo 'Usage: eccc_to_commons.sh ' exit 1 fi # Loop on xml files in source folder while IFS= read -r -d '' -u 9 do # Check XML type declare -i STATIONDATA_COUNT=$(xmlstarlet sel -t -v 'count(//stationdata)' "${REPLY}") declare -i YEAR_COUNT=$(xmlstarlet sel -t -v 'count(//stationdata[@year])' "${REPLY}") declare -i MONTH_COUNT=$(xmlstarlet sel -t -v 'count(//stationdata[@month])' "${REPLY}") declare -i DAY_COUNT=$(xmlstarlet sel -t -v 'count(//stationdata[@day])' "${REPLY}") declare -i HOUR_COUNT=$(xmlstarlet sel -t -v 'count(//stationdata[@hour])' "${REPLY}") declare -i MINUTE_COUNT=$(xmlstarlet sel -t -v 'count(//stationdata[@minute])' "${REPLY}") declare -i AL_MONTH_COUNT=$(xmlstarlet sel -t -v 'count(//month)' "${REPLY}") # Detect climate id declare LAST_DIR=$(basename $(dirname "${REPLY}")) declare FILENAME=$(basename "${REPLY%.*}") if [[ "${LAST_DIR}" =~ ^[A-Z0-9]{7}$ ]] then # Climate id is in last directory name CLIMATE_ID="${LAST_DIR}" elif [[ "${FILENAME}" =~ ^[A-Z0-9]{7}$ ]] then # Climate id is in file name CLIMATE_ID="${FILENAME}" else echo "${REPLY}: Cannot detect climate id" exit 1 fi if [ ${AL_MONTH_COUNT} -eq 0 ]&& \ [ ${STATIONDATA_COUNT} -gt 0 ]&& \ [ ${STATIONDATA_COUNT} -eq ${YEAR_COUNT} ]&& \ [ ${STATIONDATA_COUNT} -eq ${MONTH_COUNT} ]&& \ [ ${STATIONDATA_COUNT} -eq ${DAY_COUNT} ]&& \ [ ${STATIONDATA_COUNT} -eq ${HOUR_COUNT} ]&& \ [ ${STATIONDATA_COUNT} -eq ${MINUTE_COUNT} ] then echo "${REPLY}: Hourly data not compatible yet. Ignoring." >&2 continue elif [ ${AL_MONTH_COUNT} -eq 0 ]&& \ [ ${STATIONDATA_COUNT} -gt 0 ]&& \ [ ${STATIONDATA_COUNT} -eq ${YEAR_COUNT} ]&& \ [ ${STATIONDATA_COUNT} -eq ${MONTH_COUNT} ]&& \ [ ${STATIONDATA_COUNT} -eq ${DAY_COUNT} ]&& \ [ ${HOUR_COUNT} -eq 0 ]&& \ [ ${MINUTE_COUNT} -eq 0 ] then echo "${REPLY}: Daily data not compatible yet. Ignoring." >&2 continue elif [ ${AL_MONTH_COUNT} -eq 0 ]&& \ [ ${STATIONDATA_COUNT} -gt 0 ]&& \ [ ${STATIONDATA_COUNT} -eq ${YEAR_COUNT} ]&& \ [ ${STATIONDATA_COUNT} -eq ${MONTH_COUNT} ]&& \ [ ${DAY_COUNT} -eq 0 ]&& \ [ ${HOUR_COUNT} -eq 0 ]&& \ [ ${MINUTE_COUNT} -eq 0 ] then STYLESHEET_PATH='monthly_to_commons.xslt' DESTINATION_PATH="${DESTINATION}/weather.gc.ca/Monthly/${CLIMATE_ID}.tab" elif [ ${STATIONDATA_COUNT} -eq 0 ]&& \ [ ${AL_MONTH_COUNT} -gt 0 ] then # Check file contains data declare -i AL_DAY_COUNT=$(xmlstarlet sel -t -v 'count(//day)' "${REPLY}") if [ ${AL_DAY_COUNT} -eq 0 ] then echo "${REPLY}: No day found. Ignoring." >&2 continue fi STYLESHEET_PATH='almanac_to_commons.xslt' DESTINATION_PATH="${DESTINATION}/weather.gc.ca/Almanac/${CLIMATE_ID}.tab" else echo "${REPLY}: Cannot detect file type. Exiting." exit 1 fi echo "Processing ${REPLY} to ${DESTINATION_PATH}…" >&2 mkdir -p "$(dirname "${DESTINATION_PATH}")" xmlstarlet tr "${STYLESHEET_PATH}" "${REPLY}" | jq > "${DESTINATION_PATH}" done 9< <( find "${SOURCE}" -type f -name '*.xml' -print0 )