#!/bin/bash
# eccc_to_commons - Batch convert Environment and Climate change Canada
# historical XML data into a JSON format suitable for
# Wikimedia Commons.
# Copyright (C) 2019-2020 Pierre Choffet
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
set -e
SOURCE="${1}"
DESTINATION="${2}"
if [ -z "${SOURCE}" ]||[ -z "${DESTINATION}" ]||[ ! -d "${DESTINATION}" ]
then
echo 'Fix XMLs provided by Environment and Climate change Canada'
echo 'Usage: eccc_to_commons.sh '
exit 1
fi
# Loop on xml files in source folder
while IFS= read -r -d '' -u 9
do
# Check XML type
declare -i STATIONDATA_COUNT=$(xmlstarlet sel -t -v 'count(//stationdata)' "${REPLY}")
declare -i YEAR_COUNT=$(xmlstarlet sel -t -v 'count(//stationdata[@year])' "${REPLY}")
declare -i MONTH_COUNT=$(xmlstarlet sel -t -v 'count(//stationdata[@month])' "${REPLY}")
declare -i DAY_COUNT=$(xmlstarlet sel -t -v 'count(//stationdata[@day])' "${REPLY}")
declare -i HOUR_COUNT=$(xmlstarlet sel -t -v 'count(//stationdata[@hour])' "${REPLY}")
declare -i MINUTE_COUNT=$(xmlstarlet sel -t -v 'count(//stationdata[@minute])' "${REPLY}")
declare -i AL_MONTH_COUNT=$(xmlstarlet sel -t -v 'count(//month)' "${REPLY}")
# Detect climate id
declare LAST_DIR=$(basename $(dirname "${REPLY}"))
declare FILENAME=$(basename "${REPLY%.*}")
if [[ "${LAST_DIR}" =~ ^[A-Z0-9]{7}$ ]]
then
# Climate id is in last directory name
CLIMATE_ID="${LAST_DIR}"
elif [[ "${FILENAME}" =~ ^[A-Z0-9]{7}$ ]]
then
# Climate id is in file name
CLIMATE_ID="${FILENAME}"
else
echo "${REPLY}: Cannot detect climate id"
exit 1
fi
if [ ${AL_MONTH_COUNT} -eq 0 ]&& \
[ ${STATIONDATA_COUNT} -gt 0 ]&& \
[ ${STATIONDATA_COUNT} -eq ${YEAR_COUNT} ]&& \
[ ${STATIONDATA_COUNT} -eq ${MONTH_COUNT} ]&& \
[ ${STATIONDATA_COUNT} -eq ${DAY_COUNT} ]&& \
[ ${STATIONDATA_COUNT} -eq ${HOUR_COUNT} ]&& \
[ ${STATIONDATA_COUNT} -eq ${MINUTE_COUNT} ]
then
echo "${REPLY}: Hourly data not compatible yet. Ignoring." >&2
continue
elif [ ${AL_MONTH_COUNT} -eq 0 ]&& \
[ ${STATIONDATA_COUNT} -gt 0 ]&& \
[ ${STATIONDATA_COUNT} -eq ${YEAR_COUNT} ]&& \
[ ${STATIONDATA_COUNT} -eq ${MONTH_COUNT} ]&& \
[ ${STATIONDATA_COUNT} -eq ${DAY_COUNT} ]&& \
[ ${HOUR_COUNT} -eq 0 ]&& \
[ ${MINUTE_COUNT} -eq 0 ]
then
echo "${REPLY}: Daily data not compatible yet. Ignoring." >&2
continue
elif [ ${AL_MONTH_COUNT} -eq 0 ]&& \
[ ${STATIONDATA_COUNT} -gt 0 ]&& \
[ ${STATIONDATA_COUNT} -eq ${YEAR_COUNT} ]&& \
[ ${STATIONDATA_COUNT} -eq ${MONTH_COUNT} ]&& \
[ ${DAY_COUNT} -eq 0 ]&& \
[ ${HOUR_COUNT} -eq 0 ]&& \
[ ${MINUTE_COUNT} -eq 0 ]
then
STYLESHEET_PATH='monthly_to_commons.xslt'
DESTINATION_PATH="${DESTINATION}/weather.gc.ca/Monthly/${CLIMATE_ID}.tab"
elif [ ${STATIONDATA_COUNT} -eq 0 ]&& \
[ ${AL_MONTH_COUNT} -gt 0 ]
then
STYLESHEET_PATH='almanac_to_commons.xslt'
DESTINATION_PATH="${DESTINATION}/weather.gc.ca/Almanac/${CLIMATE_ID}.tab"
else
echo "${REPLY}: Cannot detect file type. Exiting."
exit 1
fi
echo "Processing ${REPLY} to ${DESTINATION_PATH}…" >&2
mkdir -p "$(dirname "${DESTINATION_PATH}")"
xmlstarlet tr "${STYLESHEET_PATH}" "${REPLY}" | jq > "${DESTINATION_PATH}"
done 9< <( find "${SOURCE}" -type f -name '*.xml' -print0 )