X-Git-Url: https://git.wikimedia.ca/?p=eccc_to_commons.git;a=blobdiff_plain;f=eccc_merger.sh;fp=eccc_merger.sh;h=95b16826f6184936286c4b8483ee19382328163d;hp=0000000000000000000000000000000000000000;hb=205649e59339883769b0bc664fee1256d5816350;hpb=9c6c430825bcb27b09d8209620fc5ce9c01c698f diff --git a/eccc_merger.sh b/eccc_merger.sh new file mode 100755 index 0000000..95b1682 --- /dev/null +++ b/eccc_merger.sh @@ -0,0 +1,96 @@ +#!/bin/bash + +# eccc_merger.sh - Merge Environment and Climate change Canada historical data +# Copyright (C) 2020, 2021 Pierre Choffet +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +set -ex + +# User parameters +PATHS="${@}" + +function usage() { + cat <<-EOF + Merge XMLs provided by Environment and Climate change Canada + Usage: eccc_merger.sh [ECCC XML path […]] + + At least two XML files must be provided for them to be merged (obviously…). + Subsequent files will be merged as well, result is written to standard + output. + All XMLs must be valid to commons_rules.xsd and be same type (monthly or + almanach) + EOF + exit +} + +# Auto detect input types +MERGE_TYPE='' +for path in $@ +do + # Check file exists + if [ ! -f "${path}" ] + then + echo "File ${path} doesn't exist" >&2 + exit 1 + fi + + # Check merge type + FILE_TYPE=$(xmlstarlet sel -t -i '/climatedata/month' -o almanach --elif '/climatedata/stationdata' -o monthly --else -o '' "${path}") + + if [ "${MERGE_TYPE}" != '' ] + then + if [ "${FILE_TYPE}" != "${MERGE_TYPE}" ] + then + echo 'All XMLs must be the same type' >&2 + exit 1 + fi + else + MERGE_TYPE="${FILE_TYPE}" + fi +done + +# Select stylesheet to be used +if [ "${MERGE_TYPE}" == 'almanach' ] +then + STYLESHEET='eccc_merger_almanach.xslt' +elif [ "${MERGE_TYPE}" == 'monthly' ] +then + STYLESHEET='eccc_merger_monthly.xslt' + echo 'Not working with monthly data for now. Exiting.' + exit +fi + +# Merge +FIRST_PASS=1 +for path in ${PATHS} +do + if [ "${FIRST_PASS}" -eq 1 ] + then + FIRST_PASS=0 + FIRST_PATH="$(mktemp)" + cp "${path}" "${FIRST_PATH}" + continue + fi + + SECOND_PATH="${path}" + OUT_PATH="$(mktemp)" + xmlstarlet tr "${STYLESHEET}" -s "merge-path=${SECOND_PATH}" "${FIRST_PATH}" > "${OUT_PATH}" + + rm "${FIRST_PATH}" + FIRST_PATH="${OUT_PATH}" +done + +cat "${OUT_PATH}" +rm "${OUT_PATH}"