]> Wikimedia Canada | Git repositories - eccc_to_commons.git/blob - eccc_merger.sh
Rewrite almanach merge logic
[eccc_to_commons.git] / eccc_merger.sh
1 #!/bin/bash
2
3 # eccc_merger.sh - Merge Environment and Climate change Canada historical data
4 # Copyright (C) 2020, 2021 Pierre Choffet
5 #
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
19 set -e
20
21 # User parameters
22 PATHS="${@}"
23
24 function usage() {
25 cat <<-EOF
26 Merge XMLs provided by Environment and Climate change Canada
27 Usage: eccc_merger.sh <ECCC XML path> <ECCC XML path> [ECCC XML path […]]
28
29 At least two XML files must be provided for them to be merged (obviously…).
30 Subsequent files will be merged as well, result is written to standard
31 output.
32 All XMLs must be valid to commons_rules.xsd and be same type (monthly or
33 almanach)
34 EOF
35 exit
36 }
37
38 # Auto detect input types
39 MERGE_TYPE=''
40 for path in $@
41 do
42 # Check file exists
43 if [ ! -f "${path}" ]
44 then
45 echo "File ${path} doesn't exist" >&2
46 exit 1
47 fi
48
49 # Check merge type
50 FILE_TYPE=$(xmlstarlet sel -t -i '/climatedata/month' -o almanach --elif '/climatedata/stationdata' -o monthly --else -o '' "${path}")
51
52 if [ "${MERGE_TYPE}" != '' ]
53 then
54 if [ "${FILE_TYPE}" != "${MERGE_TYPE}" ]
55 then
56 echo 'All XMLs must be the same type' >&2
57 exit 1
58 fi
59 else
60 MERGE_TYPE="${FILE_TYPE}"
61 fi
62 done
63
64 # Select stylesheet to be used
65 if [ "${MERGE_TYPE}" == 'almanach' ]
66 then
67 STYLESHEET='eccc_merger_almanach.xslt'
68 elif [ "${MERGE_TYPE}" == 'monthly' ]
69 then
70 STYLESHEET='eccc_merger_monthly.xslt'
71 echo 'Not working with monthly data for now. Exiting.'
72 exit
73 fi
74
75 # Merge
76 FIRST_PASS=1
77 for path in ${PATHS}
78 do
79 if [ "${FIRST_PASS}" -eq 1 ]
80 then
81 FIRST_PASS=0
82 FIRST_PATH="$(mktemp)"
83 cp "${path}" "${FIRST_PATH}"
84 continue
85 fi
86
87 SECOND_PATH="${path}"
88 OUT_PATH="$(mktemp)"
89 xmlstarlet tr "${STYLESHEET}" -s "merge-path=${SECOND_PATH}" "${FIRST_PATH}" > "${OUT_PATH}"
90
91 rm "${FIRST_PATH}"
92 FIRST_PATH="${OUT_PATH}"
93 done
94
95 cat "${OUT_PATH}"
96 rm "${OUT_PATH}"