]> Wikimedia Canada | Git repositories - eccc_to_commons.git/blob - dllist.sh
Fix indentation
[eccc_to_commons.git] / dllist.sh
1 #!/bin/bash
2
3 # dllist - Set of tools to replicate Environment and Climate change Canada data
4 # on Wikimedia Commons
5 # Copyright (C) 2019-2020 Pierre Choffet
6 #
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
20 STATION_INVENTORY_URL='ftp://client_climate@ftp.tor.ec.gc.ca/Pub/Get_More_Data_Plus_de_donnees/Station Inventory EN.csv'
21 XML_PREFIX='https://climate.weather.gc.ca/climate_data/bulk_data_e.html?format=xml'
22 CSV_COLUMNS=19
23
24 set -e
25
26 function generateStation() {
27 local -ir id="${1}"
28 local -ir monthly_start_year="${2}"
29 local -ir monthly_end_year="${3}"
30 local -ir daily_start_year="${4}"
31 local -ir daily_end_year="${5}"
32 local -ir hourly_start_year="${6}"
33 local -ir hourly_end_year="${7}"
34
35 # Generate monthly link
36 if [ "${monthly_start_year}" -ne 0 ]&&[ "${monthly_end_year}" -ne 0 ]
37 then
38 cat <<-EOF >&1
39 url = "${XML_PREFIX}&timeframe=3&stationID=${id}"
40 output = "${OUTPUT_PREFIX}/monthly/${id}.xml"
41
42 EOF
43 fi
44
45 # Generate daily links
46 if [ "${daily_start_year}" -ne 0 ]&&[ "${daily_end_year}" -ne 0 ]
47 then
48 for year in $(seq ${daily_start_year} ${daily_end_year})
49 do
50 cat <<-EOF >&1
51 url = "${XML_PREFIX}&timeframe=2&stationID=${id}&Year=${year}&Month=1"
52 output = "${OUTPUT_PREFIX}/daily/${id}/${year}.xml"
53
54 EOF
55 done
56 fi
57
58 # Generate hourly links
59 if [ "${hourly_start_year}" -ne 0 ]&&[ "${hourly_end_year}" -ne 0 ]
60 then
61 for year in $(seq ${hourly_start_year} ${hourly_end_year})
62 do
63 for month in $(seq 1 12)
64 do
65 # Avoid future stats
66 if [ "${year}" -eq "${NOW_YEAR}" ]&&[ "${month}" -gt "${NOW_MONTH}" ]
67 then
68 continue
69 fi
70
71 cat <<-EOF >&1
72 url = "${XML_PREFIX}&timeframe=1&stationID=${id}&Year=${year}&Month=${month}&Day=1"
73 output = "${OUTPUT_PREFIX}/hourly/${id}/${year}-$(printf "%02i" ${month}).xml"
74
75 EOF
76 done
77 done
78 fi
79
80 # Generate almanac link
81 cat <<-EOF >&1
82 url = "${XML_PREFIX}&timeframe=4&stationID=${id}"
83 output = "${OUTPUT_PREFIX}/almanac/${id}.xml"
84
85 EOF
86 }
87
88
89 # Check user input
90 if [ "${1}" == '' ]
91 then
92 echo "No download folder given"
93 exit 1
94 else
95 OUTPUT_PREFIX=$(realpath "${1}")
96 fi
97
98 # Get stations inventory
99 STATION_INVENTORY_PATH="$(mktemp)"
100 EXPECTED_COMMAS_COUNT="$((${CSV_COLUMNS} - 1))"
101 EXPECTED_QUOTES_COUNT="$((${CSV_COLUMNS} * 2))"
102
103 # Date
104 NOW_YEAR=$(date +%Y)
105 NOW_MONTH=$(date +%m)
106
107 curl "${STATION_INVENTORY_URL}" > "${STATION_INVENTORY_PATH}"
108
109 # Read stations ids
110 HEADER=1
111 while IFS= read -r station
112 do
113 # WARNING: This is a very naive reading of a csv line. Add some sanity checks
114 # so we can fail gracefully if escaped '"' or ',' added in source.
115 COMMAS=${station//[^,]}
116 QUOTES=${station//[^\"]}
117
118 if [ ${#COMMAS} -eq ${EXPECTED_COMMAS_COUNT} ]||[ ${#QUOTES} -eq ${EXPECTED_QUOTES_COUNT} ]
119 then
120 # Parsing inside CSV content
121 if [ ${HEADER} -ne 0 ]
122 then
123 # First valid file is columns name, continue
124 HEADER=0
125 continue
126 fi
127
128 STATION_ID="$(echo "${station}" | awk -F'"' '{print $8}')"
129 STATION_HOURLY_START=$(echo "${station}" | awk -F'"' '{print $28}')
130 STATION_HOURLY_END=$(echo "${station}" | awk -F'"' '{print $30}')
131 STATION_DAILY_START=$(echo "${station}" | awk -F'"' '{print $32}')
132 STATION_DAILY_END=$(echo "${station}" | awk -F'"' '{print $34}')
133 STATION_MONTHLY_START=$(echo "${station}" | awk -F'"' '{print $36}')
134 STATION_MONTHLY_END=$(echo "${station}" | awk -F'"' '{print $38}')
135 generateStation "${STATION_ID}" "${STATION_MONTHLY_START}" "${STATION_MONTHLY_END}" "${STATION_DAILY_START}" "${STATION_DAILY_END}" "${STATION_HOURLY_START}" "${STATION_HOURLY_END}"
136 else
137 if [ "${HEADER}" -eq 0 ]
138 then
139 # NOTE: Script requests CSV contains 19 columns. If structure changed, please warn the developers.
140 echo "Structure of station inventory file changed. See comment above line ${LINENO} for more explanation. Exiting." >&2
141 exit 1
142 else
143 # Inside header, we can continue
144 continue
145 fi
146
147 fi
148 done < "${STATION_INVENTORY_PATH}"
149
150 # Clean
151 rm "${STATION_INVENTORY_PATH}"