#!/bin/bash # dllist - Set of tools to replicate Environment and Climate change Canada data # on Wikimedia Commons # Copyright (C) 2019-2020 Pierre Choffet # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . STATION_INVENTORY_URL='ftp://client_climate@ftp.tor.ec.gc.ca/Pub/Get_More_Data_Plus_de_donnees/Station Inventory EN.csv' XML_PREFIX='https://climate.weather.gc.ca/climate_data/bulk_data_e.html?format=xml' CSV_COLUMNS=19 set -e function generateStation() { local -r cid="${1}" local -ir sid="${2}" local -ir monthly_start_year="${3}" local -ir monthly_end_year="${4}" local -ir daily_start_year="${5}" local -ir daily_end_year="${6}" local -ir hourly_start_year="${7}" local -ir hourly_end_year="${8}" # Generate monthly link if [ "${monthly_start_year}" -ne 0 ]&&[ "${monthly_end_year}" -ne 0 ] then cat <<-EOF >&1 url = "${XML_PREFIX}&timeframe=3&stationID=${sid}" output = "${OUTPUT_PREFIX}/monthly/${cid}.xml" EOF fi # Generate daily links if [ "${daily_start_year}" -ne 0 ]&&[ "${daily_end_year}" -ne 0 ] then for year in $(seq ${daily_start_year} ${daily_end_year}) do cat <<-EOF >&1 url = "${XML_PREFIX}&timeframe=2&stationID=${sid}&Year=${year}&Month=1" output = "${OUTPUT_PREFIX}/daily/${cid}/${year}.xml" EOF done fi # Generate hourly links if [ "${hourly_start_year}" -ne 0 ]&&[ "${hourly_end_year}" -ne 0 ] then for year in $(seq ${hourly_start_year} ${hourly_end_year}) do for month in $(seq 1 12) do # Avoid future stats if [ "${year}" -eq "${NOW_YEAR}" ]&&[ "${month}" -gt "${NOW_MONTH}" ] then continue fi cat <<-EOF >&1 url = "${XML_PREFIX}&timeframe=1&stationID=${sid}&Year=${year}&Month=${month}&Day=1" output = "${OUTPUT_PREFIX}/hourly/${cid}/${year}-$(printf "%02i" ${month}).xml" EOF done done fi # Generate almanac link cat <<-EOF >&1 url = "${XML_PREFIX}&timeframe=4&stationID=${sid}" output = "${OUTPUT_PREFIX}/almanac/${cid}.xml" EOF } # Check user input if [ "${1}" == '' ] then echo "No download folder given" exit 1 else OUTPUT_PREFIX=$(realpath "${1}") fi # Get stations inventory STATION_INVENTORY_PATH="$(mktemp)" EXPECTED_COMMAS_COUNT="$((${CSV_COLUMNS} - 1))" EXPECTED_QUOTES_COUNT="$((${CSV_COLUMNS} * 2))" # Date NOW_YEAR=$(date +%Y) NOW_MONTH=$(date +%m) curl "${STATION_INVENTORY_URL}" > "${STATION_INVENTORY_PATH}" # Read stations ids HEADER=1 while IFS= read -r station do # WARNING: This is a very naive reading of a csv line. Add some sanity checks # so we can fail gracefully if escaped '"' or ',' added in source. COMMAS=${station//[^,]} QUOTES=${station//[^\"]} if [ ${#COMMAS} -eq ${EXPECTED_COMMAS_COUNT} ]||[ ${#QUOTES} -eq ${EXPECTED_QUOTES_COUNT} ] then # Parsing inside CSV content if [ ${HEADER} -ne 0 ] then # First valid file is columns name, continue HEADER=0 continue fi CLIMATE_ID="$(echo "${station}" | awk -F'"' '{print $6}')" STATION_ID="$(echo "${station}" | awk -F'"' '{print $8}')" STATION_HOURLY_START=$(echo "${station}" | awk -F'"' '{print $28}') STATION_HOURLY_END=$(echo "${station}" | awk -F'"' '{print $30}') STATION_DAILY_START=$(echo "${station}" | awk -F'"' '{print $32}') STATION_DAILY_END=$(echo "${station}" | awk -F'"' '{print $34}') STATION_MONTHLY_START=$(echo "${station}" | awk -F'"' '{print $36}') STATION_MONTHLY_END=$(echo "${station}" | awk -F'"' '{print $38}') generateStation "${CLIMATE_ID}" "${STATION_ID}" "${STATION_MONTHLY_START}" "${STATION_MONTHLY_END}" "${STATION_DAILY_START}" "${STATION_DAILY_END}" "${STATION_HOURLY_START}" "${STATION_HOURLY_END}" else if [ "${HEADER}" -eq 0 ] then # NOTE: Script requests CSV contains 19 columns. If structure changed, please warn the developers. echo "Structure of station inventory file changed. See comment above line ${LINENO} for more explanation. Exiting." >&2 exit 1 else # Inside header, we can continue continue fi fi done < "${STATION_INVENTORY_PATH}" # Clean rm "${STATION_INVENTORY_PATH}"