]>
Wikimedia Canada | Git repositories - eccc_to_commons.git/blob - eccc_to_commons.sh
3 # eccc_to_commons - Batch convert Environment and Climate change Canada
4 # historical XML data into a JSON format suitable for
6 # Copyright (C) 2019-2020 Pierre Choffet
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program. If not, see <http://www.gnu.org/licenses/>.
26 if [ -z "${SOURCE}" ]||[ -z "${DESTINATION}" ]||[ ! -d "${DESTINATION}" ]
28 echo 'Fix XMLs provided by Environment and Climate change Canada'
29 echo 'Usage: eccc_to_commons.sh <source folder> <destination folder>'
33 # Loop on xml files in source folder
34 while IFS
= read -r -d '' -u 9
37 declare -i STATIONDATA_COUNT
=$(xmlstarlet sel -t -v 'count(//stationdata)' "${REPLY}")
38 declare -i YEAR_COUNT=$(xmlstarlet sel -t -v 'count(//stationdata[@year])' "${REPLY}")
39 declare -i MONTH_COUNT
=$(xmlstarlet sel -t -v 'count(//stationdata[@month])' "${REPLY}")
40 declare -i DAY_COUNT=$(xmlstarlet sel -t -v 'count(//stationdata[@day])' "${REPLY}")
41 declare -i HOUR_COUNT
=$(xmlstarlet sel -t -v 'count(//stationdata[@hour])' "${REPLY}")
42 declare -i MINUTE_COUNT=$(xmlstarlet sel -t -v 'count(//stationdata[@minute])' "${REPLY}")
43 declare -i AL_MONTH_COUNT
=$(xmlstarlet sel -t -v 'count(//month)' "${REPLY}")
46 declare LAST_DIR=$(basename $(dirname "${REPLY}"))
47 declare FILENAME=$(basename "${REPLY%.*}")
49 if [[ "${LAST_DIR}" =~ ^[A-Z0-9]{7}$ ]]
51 # Climate id is in last directory name
52 CLIMATE_ID="${LAST_DIR}"
53 elif [[ "${FILENAME}" =~ ^[A-Z0-9]{7}$ ]]
55 # Climate id is in file name
56 CLIMATE_ID="${FILENAME}"
58 echo "${REPLY}: Cannot detect climate id"
62 if [ ${AL_MONTH_COUNT} -eq 0 ]&& \
63 [ ${STATIONDATA_COUNT} -gt 0 ]&& \
64 [ ${STATIONDATA_COUNT} -eq ${YEAR_COUNT} ]&& \
65 [ ${STATIONDATA_COUNT} -eq ${MONTH_COUNT} ]&& \
66 [ ${STATIONDATA_COUNT} -eq ${DAY_COUNT} ]&& \
67 [ ${STATIONDATA_COUNT} -eq ${HOUR_COUNT} ]&& \
68 [ ${STATIONDATA_COUNT} -eq ${MINUTE_COUNT} ]
70 echo "${REPLY}: Hourly data not compatible yet. Ignoring." >&2
72 elif [ ${AL_MONTH_COUNT} -eq 0 ]&& \
73 [ ${STATIONDATA_COUNT} -gt 0 ]&& \
74 [ ${STATIONDATA_COUNT} -eq ${YEAR_COUNT} ]&& \
75 [ ${STATIONDATA_COUNT} -eq ${MONTH_COUNT} ]&& \
76 [ ${STATIONDATA_COUNT} -eq ${DAY_COUNT} ]&& \
77 [ ${HOUR_COUNT} -eq 0 ]&& \
78 [ ${MINUTE_COUNT} -eq 0 ]
80 echo "${REPLY}: Daily data not compatible yet. Ignoring." >&2
82 elif [ ${AL_MONTH_COUNT} -eq 0 ]&& \
83 [ ${STATIONDATA_COUNT} -gt 0 ]&& \
84 [ ${STATIONDATA_COUNT} -eq ${YEAR_COUNT} ]&& \
85 [ ${STATIONDATA_COUNT} -eq ${MONTH_COUNT} ]&& \
86 [ ${DAY_COUNT} -eq 0 ]&& \
87 [ ${HOUR_COUNT} -eq 0 ]&& \
88 [ ${MINUTE_COUNT} -eq 0 ]
90 STYLESHEET_PATH='monthly_to_commons.xslt
'
91 DESTINATION_PATH="${DESTINATION}/weather.gc.ca/Monthly/${CLIMATE_ID}.tab"
92 elif [ ${STATIONDATA_COUNT} -eq 0 ]&& \
93 [ ${AL_MONTH_COUNT} -gt 0 ]
95 # Check file contains data
96 declare -i AL_DAY_COUNT=$(xmlstarlet sel -t -v 'count(//day)' "${REPLY}")
97 if [ ${AL_DAY_COUNT} -eq 0 ]
99 echo "${REPLY}: No day found. Ignoring." >&2
103 STYLESHEET_PATH
='almanac_to_commons.xslt'
104 DESTINATION_PATH
="${DESTINATION}/weather.gc.ca/Almanac/${CLIMATE_ID}.tab"
106 echo "${REPLY}: Cannot detect file type. Exiting."
110 echo "Processing ${REPLY} to ${DESTINATION_PATH}…" >&2
111 mkdir -p "$(dirname "${DESTINATION_PATH}")"
112 xmlstarlet
tr "${STYLESHEET_PATH}" "${REPLY}" | jq
> "${DESTINATION_PATH}"
113 done 9< <( find "${SOURCE}" -type f
-name '*.xml' -print0 )