]>
Wikimedia Canada | Git repositories - eccc_to_commons.git/blob - dllist.sh
3 # dllist - Set of tools to replicate Environment and Climate change Canada data
5 # Copyright (C) 2019-2020 Pierre Choffet
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
20 STATION_INVENTORY_URL
='ftp://client_climate@ftp.tor.ec.gc.ca/Pub/Get_More_Data_Plus_de_donnees/Station Inventory EN.csv'
21 XML_PREFIX
='https://climate.weather.gc.ca/climate_data/bulk_data_e.html?format=xml'
26 function generateStation
() {
28 local -ir monthly_start_year
="${2}"
29 local -ir monthly_end_year
="${3}"
30 local -ir daily_start_year
="${4}"
31 local -ir daily_end_year
="${5}"
32 local -ir hourly_start_year
="${6}"
33 local -ir hourly_end_year
="${7}"
35 # Generate monthly link
36 if [ "${monthly_start_year}" -ne 0 ]&&[ "${monthly_end_year}" -ne 0 ]
39 url = "${XML_PREFIX}&timeframe=3&stationID=${id}"
40 output = "${OUTPUT_PREFIX}/monthly/${id}.xml"
45 # Generate daily links
46 if [ "${daily_start_year}" -ne 0 ]&&[ "${daily_end_year}" -ne 0 ]
48 for year
in $(seq ${daily_start_year} ${daily_end_year})
51 url = "${XML_PREFIX}&timeframe=2&stationID=${id}&Year=${year}&Month=1"
52 output = "${OUTPUT_PREFIX}/daily/${id}/${year}.xml"
58 # Generate hourly links
59 if [ "${hourly_start_year}" -ne 0 ]&&[ "${hourly_end_year}" -ne 0 ]
61 for year
in $(seq ${hourly_start_year} ${hourly_end_year})
63 for month
in $(seq 1 12)
66 if [ "${year}" -eq "${NOW_YEAR}" ]&&[ "${month}" -gt "${NOW_MONTH}" ]
72 url = "${XML_PREFIX}&timeframe=1&stationID=${id}&Year=${year}&Month=${month}&Day=1"
73 output = "${OUTPUT_PREFIX}/hourly/${id}/${year}-$(printf "%02i" ${month}).xml"
80 # Generate almanac link
82 url = "${XML_PREFIX}&timeframe=4&stationID=${id}"
83 output = "${OUTPUT_PREFIX}/almanac/${id}.xml"
92 echo "No download folder given"
95 OUTPUT_PREFIX
=$(realpath "${1}")
98 # Get stations inventory
99 STATION_INVENTORY_PATH
="$(mktemp)"
100 EXPECTED_COMMAS_COUNT
="$((${CSV_COLUMNS} - 1))"
101 EXPECTED_QUOTES_COUNT
="$((${CSV_COLUMNS} * 2))"
105 NOW_MONTH
=$(date +%m)
107 curl
"${STATION_INVENTORY_URL}" > "${STATION_INVENTORY_PATH}"
111 while IFS
= read -r station
113 # WARNING: This is a very naive reading of a csv line. Add some sanity checks
114 # so we can fail gracefully if escaped '"' or ',' added in source.
115 COMMAS
=${station//[^,]}
116 QUOTES
=${station//[^\"]}
118 if [ ${#COMMAS} -eq ${EXPECTED_COMMAS_COUNT} ]||[ ${#QUOTES} -eq ${EXPECTED_QUOTES_COUNT} ]
120 # Parsing inside CSV content
121 if [ ${HEADER} -ne 0 ]
123 # First valid file is columns name, continue
128 STATION_ID
="$(echo "${station}" | awk -F'"' '{print $8}')"
129 STATION_HOURLY_START
=$(echo "${station}" | awk -F'"' '{print $28}')
130 STATION_HOURLY_END
=$(echo "${station}" | awk -F'"' '{print $30}')
131 STATION_DAILY_START
=$(echo "${station}" | awk -F'"' '{print $32}')
132 STATION_DAILY_END
=$(echo "${station}" | awk -F'"' '{print $34}')
133 STATION_MONTHLY_START
=$(echo "${station}" | awk -F'"' '{print $36}')
134 STATION_MONTHLY_END
=$(echo "${station}" | awk -F'"' '{print $38}')
135 generateStation
"${STATION_ID}" "${STATION_MONTHLY_START}" "${STATION_MONTHLY_END}" "${STATION_DAILY_START}" "${STATION_DAILY_END}" "${STATION_HOURLY_START}" "${STATION_HOURLY_END}"
137 if [ "${HEADER}" -eq 0 ]
139 # NOTE: Script requests CSV contains 19 columns. If structure changed, please warn the developers.
140 echo "Structure of station inventory file changed. See comment above line ${LINENO} for more explanation. Exiting." >&2
143 # Inside header, we can continue
148 done < "${STATION_INVENTORY_PATH}"
151 rm "${STATION_INVENTORY_PATH}"