]>
Wikimedia Canada | Git repositories - eccc_to_commons.git/blob - dllist.sh
3 # dllist - Set of tools to replicate Environment and Climate change Canada data
5 # Copyright (C) 2019-2020 Pierre Choffet
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
20 STATION_INVENTORY_URL
='ftp://client_climate@ftp.tor.ec.gc.ca/Pub/Get_More_Data_Plus_de_donnees/Station Inventory EN.csv'
21 XML_PREFIX
='https://climate.weather.gc.ca/climate_data/bulk_data_e.html?format=xml'
26 function generateStation
() {
29 local -ir monthly_start_year
="${3}"
30 local -ir monthly_end_year
="${4}"
31 local -ir daily_start_year
="${5}"
32 local -ir daily_end_year
="${6}"
33 local -ir hourly_start_year
="${7}"
34 local -ir hourly_end_year
="${8}"
36 # Generate monthly link
37 if [ "${monthly_start_year}" -ne 0 ]&&[ "${monthly_end_year}" -ne 0 ]
40 url = "${XML_PREFIX}&timeframe=3&stationID=${sid}"
41 output = "${OUTPUT_PREFIX}/monthly/${cid}.xml"
46 # Generate daily links
47 if [ "${daily_start_year}" -ne 0 ]&&[ "${daily_end_year}" -ne 0 ]
49 for year
in $(seq ${daily_start_year} ${daily_end_year})
52 url = "${XML_PREFIX}&timeframe=2&stationID=${sid}&Year=${year}&Month=1"
53 output = "${OUTPUT_PREFIX}/daily/${cid}/${year}.xml"
59 # Generate hourly links
60 if [ "${hourly_start_year}" -ne 0 ]&&[ "${hourly_end_year}" -ne 0 ]
62 for year
in $(seq ${hourly_start_year} ${hourly_end_year})
64 for month
in $(seq 1 12)
67 if [ "${year}" -eq "${NOW_YEAR}" ]&&[ "${month}" -gt "${NOW_MONTH}" ]
73 url = "${XML_PREFIX}&timeframe=1&stationID=${sid}&Year=${year}&Month=${month}&Day=1"
74 output = "${OUTPUT_PREFIX}/hourly/${cid}/${year}-$(printf "%02i" ${month}).xml"
81 # Generate almanac link
83 url = "${XML_PREFIX}&timeframe=4&stationID=${sid}"
84 output = "${OUTPUT_PREFIX}/almanac/${cid}.xml"
93 echo "No download folder given"
96 OUTPUT_PREFIX
=$(realpath "${1}")
99 # Get stations inventory
100 STATION_INVENTORY_PATH
="$(mktemp)"
101 EXPECTED_COMMAS_COUNT
="$((${CSV_COLUMNS} - 1))"
102 EXPECTED_QUOTES_COUNT
="$((${CSV_COLUMNS} * 2))"
106 NOW_MONTH
=$(date +%m)
108 curl
"${STATION_INVENTORY_URL}" > "${STATION_INVENTORY_PATH}"
112 while IFS
= read -r station
114 # WARNING: This is a very naive reading of a csv line. Add some sanity checks
115 # so we can fail gracefully if escaped '"' or ',' added in source.
116 COMMAS
=${station//[^,]}
117 QUOTES
=${station//[^\"]}
119 if [ ${#COMMAS} -eq ${EXPECTED_COMMAS_COUNT} ]||[ ${#QUOTES} -eq ${EXPECTED_QUOTES_COUNT} ]
121 # Parsing inside CSV content
122 if [ ${HEADER} -ne 0 ]
124 # First valid file is columns name, continue
129 CLIMATE_ID
="$(echo "${station}" | awk -F'"' '{print $6}')"
130 STATION_ID
="$(echo "${station}" | awk -F'"' '{print $8}')"
131 STATION_HOURLY_START
=$(echo "${station}" | awk -F'"' '{print $28}')
132 STATION_HOURLY_END
=$(echo "${station}" | awk -F'"' '{print $30}')
133 STATION_DAILY_START
=$(echo "${station}" | awk -F'"' '{print $32}')
134 STATION_DAILY_END
=$(echo "${station}" | awk -F'"' '{print $34}')
135 STATION_MONTHLY_START
=$(echo "${station}" | awk -F'"' '{print $36}')
136 STATION_MONTHLY_END
=$(echo "${station}" | awk -F'"' '{print $38}')
137 generateStation
"${CLIMATE_ID}" "${STATION_ID}" "${STATION_MONTHLY_START}" "${STATION_MONTHLY_END}" "${STATION_DAILY_START}" "${STATION_DAILY_END}" "${STATION_HOURLY_START}" "${STATION_HOURLY_END}"
139 if [ "${HEADER}" -eq 0 ]
141 # NOTE: Script requests CSV contains 19 columns. If structure changed, please warn the developers.
142 echo "Structure of station inventory file changed. See comment above line ${LINENO} for more explanation. Exiting." >&2
145 # Inside header, we can continue
150 done < "${STATION_INVENTORY_PATH}"
153 rm "${STATION_INVENTORY_PATH}"