]> Wikimedia Canada | Git repositories - eccc_to_commons.git/blobdiff - eccc_merger.sh
Add merge Bash script
[eccc_to_commons.git] / eccc_merger.sh
diff --git a/eccc_merger.sh b/eccc_merger.sh
new file mode 100755 (executable)
index 0000000..95b1682
--- /dev/null
@@ -0,0 +1,96 @@
+#!/bin/bash
+
+# eccc_merger.sh - Merge Environment and Climate change Canada historical data
+# Copyright (C) 2020, 2021  Pierre Choffet
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+set -ex
+
+# User parameters
+PATHS="${@}"
+
+function usage() {
+       cat <<-EOF
+               Merge XMLs provided by Environment and Climate change Canada
+               Usage: eccc_merger.sh <ECCC XML path> <ECCC XML path> [ECCC XML path […]]
+               
+               At least two XML files must be provided for them to be merged (obviously…).
+               Subsequent files will be merged as well, result is written to standard
+               output.
+               All XMLs must be valid to commons_rules.xsd and be same type (monthly or
+               almanach)
+               EOF
+       exit
+}
+
+# Auto detect input types
+MERGE_TYPE=''
+for path in $@
+do
+       # Check file exists
+       if [ ! -f "${path}" ]
+       then
+               echo "File ${path} doesn't exist" >&2
+               exit 1
+       fi
+       
+       # Check merge type
+       FILE_TYPE=$(xmlstarlet sel -t -i '/climatedata/month' -o almanach --elif '/climatedata/stationdata' -o monthly --else -o ''  "${path}")
+       
+       if [ "${MERGE_TYPE}" != '' ]
+       then
+               if [ "${FILE_TYPE}" != "${MERGE_TYPE}" ]
+               then
+                       echo 'All XMLs must be the same type' >&2
+                       exit 1
+               fi
+       else
+               MERGE_TYPE="${FILE_TYPE}"
+       fi
+done
+
+# Select stylesheet to be used
+if [ "${MERGE_TYPE}" == 'almanach' ]
+then
+       STYLESHEET='eccc_merger_almanach.xslt'
+elif [ "${MERGE_TYPE}" == 'monthly' ]
+then
+       STYLESHEET='eccc_merger_monthly.xslt'
+       echo 'Not working with monthly data for now. Exiting.'
+       exit
+fi
+
+# Merge
+FIRST_PASS=1
+for path in ${PATHS}
+do
+       if [ "${FIRST_PASS}" -eq 1 ]
+       then
+               FIRST_PASS=0
+               FIRST_PATH="$(mktemp)"
+               cp "${path}" "${FIRST_PATH}"
+               continue
+       fi
+       
+       SECOND_PATH="${path}"
+       OUT_PATH="$(mktemp)"
+       xmlstarlet tr "${STYLESHEET}" -s "merge-path=${SECOND_PATH}" "${FIRST_PATH}" > "${OUT_PATH}"
+       
+       rm "${FIRST_PATH}"
+       FIRST_PATH="${OUT_PATH}"
+done
+
+cat "${OUT_PATH}"
+rm "${OUT_PATH}"