From 2f3682a6a85c816ba37855f0633478869334c529 Mon Sep 17 00:00:00 2001 From: Pierre Choffet Date: Tue, 4 Feb 2020 17:39:39 -0500 Subject: [PATCH] Switch from station id to climate id --- README | 2 +- dllist.sh | 34 ++++++++++++++++++---------------- eccc_to_commons.sh | 24 ++++++++++++------------ 3 files changed, 31 insertions(+), 29 deletions(-) diff --git a/README b/README index 9c0fe34..ca371dc 100644 --- a/README +++ b/README @@ -68,7 +68,7 @@ Here are a few examples to inspire you: Keep only monthly data: $ cat downloads_all | grep -B1 -A1 --no-group-separator \ - -E '^output = ".*/monthly/[0-9]*.xml"$' > downloads_monthly + -E '^output = ".*/monthly/[A-Z0-9]{7}.xml"$' > downloads_monthly Remove all downloads before (restart interrupted download): $ sed -n '/https:\/\/climate.weather.gc.ca\/climate_data\/bulk_data_e.html?format=xml&timeframe=3&stationID=2606/,$p' \ diff --git a/dllist.sh b/dllist.sh index c2b9e62..ba3d436 100755 --- a/dllist.sh +++ b/dllist.sh @@ -24,20 +24,21 @@ CSV_COLUMNS=19 set -e function generateStation() { - local -ir id="${1}" - local -ir monthly_start_year="${2}" - local -ir monthly_end_year="${3}" - local -ir daily_start_year="${4}" - local -ir daily_end_year="${5}" - local -ir hourly_start_year="${6}" - local -ir hourly_end_year="${7}" + local -r cid="${1}" + local -ir sid="${2}" + local -ir monthly_start_year="${3}" + local -ir monthly_end_year="${4}" + local -ir daily_start_year="${5}" + local -ir daily_end_year="${6}" + local -ir hourly_start_year="${7}" + local -ir hourly_end_year="${8}" # Generate monthly link if [ "${monthly_start_year}" -ne 0 ]&&[ "${monthly_end_year}" -ne 0 ] then cat <<-EOF >&1 - url = "${XML_PREFIX}&timeframe=3&stationID=${id}" - output = "${OUTPUT_PREFIX}/monthly/${id}.xml" + url = "${XML_PREFIX}&timeframe=3&stationID=${sid}" + output = "${OUTPUT_PREFIX}/monthly/${cid}.xml" EOF fi @@ -48,8 +49,8 @@ function generateStation() { for year in $(seq ${daily_start_year} ${daily_end_year}) do cat <<-EOF >&1 - url = "${XML_PREFIX}&timeframe=2&stationID=${id}&Year=${year}&Month=1" - output = "${OUTPUT_PREFIX}/daily/${id}/${year}.xml" + url = "${XML_PREFIX}&timeframe=2&stationID=${sid}&Year=${year}&Month=1" + output = "${OUTPUT_PREFIX}/daily/${cid}/${year}.xml" EOF done @@ -69,8 +70,8 @@ function generateStation() { fi cat <<-EOF >&1 - url = "${XML_PREFIX}&timeframe=1&stationID=${id}&Year=${year}&Month=${month}&Day=1" - output = "${OUTPUT_PREFIX}/hourly/${id}/${year}-$(printf "%02i" ${month}).xml" + url = "${XML_PREFIX}&timeframe=1&stationID=${sid}&Year=${year}&Month=${month}&Day=1" + output = "${OUTPUT_PREFIX}/hourly/${cid}/${year}-$(printf "%02i" ${month}).xml" EOF done @@ -79,8 +80,8 @@ function generateStation() { # Generate almanac link cat <<-EOF >&1 - url = "${XML_PREFIX}&timeframe=4&stationID=${id}" - output = "${OUTPUT_PREFIX}/almanac/${id}.xml" + url = "${XML_PREFIX}&timeframe=4&stationID=${sid}" + output = "${OUTPUT_PREFIX}/almanac/${cid}.xml" EOF } @@ -125,6 +126,7 @@ do continue fi + CLIMATE_ID="$(echo "${station}" | awk -F'"' '{print $6}')" STATION_ID="$(echo "${station}" | awk -F'"' '{print $8}')" STATION_HOURLY_START=$(echo "${station}" | awk -F'"' '{print $28}') STATION_HOURLY_END=$(echo "${station}" | awk -F'"' '{print $30}') @@ -132,7 +134,7 @@ do STATION_DAILY_END=$(echo "${station}" | awk -F'"' '{print $34}') STATION_MONTHLY_START=$(echo "${station}" | awk -F'"' '{print $36}') STATION_MONTHLY_END=$(echo "${station}" | awk -F'"' '{print $38}') - generateStation "${STATION_ID}" "${STATION_MONTHLY_START}" "${STATION_MONTHLY_END}" "${STATION_DAILY_START}" "${STATION_DAILY_END}" "${STATION_HOURLY_START}" "${STATION_HOURLY_END}" + generateStation "${CLIMATE_ID}" "${STATION_ID}" "${STATION_MONTHLY_START}" "${STATION_MONTHLY_END}" "${STATION_DAILY_START}" "${STATION_DAILY_END}" "${STATION_HOURLY_START}" "${STATION_HOURLY_END}" else if [ "${HEADER}" -eq 0 ] then diff --git a/eccc_to_commons.sh b/eccc_to_commons.sh index 5d7b010..5381bb5 100755 --- a/eccc_to_commons.sh +++ b/eccc_to_commons.sh @@ -42,20 +42,20 @@ do declare -i MINUTE_COUNT=$(xmlstarlet sel -t -v 'count(//stationdata[@minute])' "${REPLY}") declare -i AL_MONTH_COUNT=$(xmlstarlet sel -t -v 'count(//month)' "${REPLY}") - # Detect station id - declare -i LAST_DIR=$(basename $(dirname "${REPLY}")) - declare -i FILENAME=$(basename "${REPLY%.*}") + # Detect climate id + declare LAST_DIR=$(basename $(dirname "${REPLY}")) + declare FILENAME=$(basename "${REPLY%.*}") - if [ "${LAST_DIR}" -gt 0 ] + if [[ "${LAST_DIR}" =~ ^[A-Z0-9]{7}$ ]] then - # Station id is in last directory name - STATION_ID="${LAST_DIR}" - elif [ "${FILENAME}" -gt 0 ] + # Climate id is in last directory name + CLIMATE_ID="${LAST_DIR}" + elif [[ "${FILENAME}" =~ ^[A-Z0-9]{7}$ ]] then - # Station id is in file name - STATION_ID="${FILENAME}" + # Climate id is in file name + CLIMATE_ID="${FILENAME}" else - echo "${REPLY}: Cannot detect station id" + echo "${REPLY}: Cannot detect climate id" exit 1 fi @@ -88,12 +88,12 @@ do [ ${MINUTE_COUNT} -eq 0 ] then STYLESHEET_PATH='monthly_to_commons.xslt' - DESTINATION_PATH="${DESTINATION}/weather.gc.ca/Monthly/${STATION_ID}.tab" + DESTINATION_PATH="${DESTINATION}/weather.gc.ca/Monthly/${CLIMATE_ID}.tab" elif [ ${STATIONDATA_COUNT} -eq 0 ]&& \ [ ${AL_MONTH_COUNT} -gt 0 ] then STYLESHEET_PATH='almanac_to_commons.xslt' - DESTINATION_PATH="${DESTINATION}/weather.gc.ca/Almanac/${STATION_ID}.tab" + DESTINATION_PATH="${DESTINATION}/weather.gc.ca/Almanac/${CLIMATE_ID}.tab" else echo "${REPLY}: Cannot detect file type. Exiting." exit 1 -- 2.42.0