]> Wikimedia Canada | Git repositories - eccc_to_commons.git/commitdiff
Switch from station id to climate id
authorPierre Choffet <peuc@wanadoo.fr>
Tue, 4 Feb 2020 22:39:39 +0000 (17:39 -0500)
committerPierre Choffet <peuc@wanadoo.fr>
Tue, 4 Feb 2020 22:39:39 +0000 (17:39 -0500)
README
dllist.sh
eccc_to_commons.sh

diff --git a/README b/README
index 9c0fe34c404fb0b2335bc3b05c196c36517adc2b..ca371dca55e57823b9fb61ca6608289c4649de8c 100644 (file)
--- a/README
+++ b/README
@@ -68,7 +68,7 @@ Here are a few examples to inspire you:
 
 Keep only monthly data:
   $ cat downloads_all | grep -B1 -A1 --no-group-separator \
-    -E '^output = ".*/monthly/[0-9]*.xml"$' > downloads_monthly
+    -E '^output = ".*/monthly/[A-Z0-9]{7}.xml"$' > downloads_monthly
 
 Remove all downloads before (restart interrupted download):
        $ sed -n '/https:\/\/climate.weather.gc.ca\/climate_data\/bulk_data_e.html?format=xml&timeframe=3&stationID=2606/,$p' \
index c2b9e627532c3e690395550405802f9481f5219d..ba3d436f75c1258a376c83d6b8b735583bd925ca 100755 (executable)
--- a/dllist.sh
+++ b/dllist.sh
@@ -24,20 +24,21 @@ CSV_COLUMNS=19
 set -e
 
 function generateStation() {
-       local -ir id="${1}"
-       local -ir monthly_start_year="${2}"
-       local -ir monthly_end_year="${3}"
-       local -ir daily_start_year="${4}"
-       local -ir daily_end_year="${5}"
-       local -ir hourly_start_year="${6}"
-       local -ir hourly_end_year="${7}"
+       local -r cid="${1}"
+       local -ir sid="${2}"
+       local -ir monthly_start_year="${3}"
+       local -ir monthly_end_year="${4}"
+       local -ir daily_start_year="${5}"
+       local -ir daily_end_year="${6}"
+       local -ir hourly_start_year="${7}"
+       local -ir hourly_end_year="${8}"
 
        # Generate monthly link
        if [ "${monthly_start_year}" -ne 0 ]&&[ "${monthly_end_year}" -ne 0 ]
        then
                cat <<-EOF >&1
-               url = "${XML_PREFIX}&timeframe=3&stationID=${id}"
-               output = "${OUTPUT_PREFIX}/monthly/${id}.xml"
+               url = "${XML_PREFIX}&timeframe=3&stationID=${sid}"
+               output = "${OUTPUT_PREFIX}/monthly/${cid}.xml"
 
                EOF
        fi
@@ -48,8 +49,8 @@ function generateStation() {
                for year in $(seq ${daily_start_year} ${daily_end_year})
                do
                        cat <<-EOF >&1
-                       url = "${XML_PREFIX}&timeframe=2&stationID=${id}&Year=${year}&Month=1"
-                       output = "${OUTPUT_PREFIX}/daily/${id}/${year}.xml"
+                       url = "${XML_PREFIX}&timeframe=2&stationID=${sid}&Year=${year}&Month=1"
+                       output = "${OUTPUT_PREFIX}/daily/${cid}/${year}.xml"
 
                        EOF
                done
@@ -69,8 +70,8 @@ function generateStation() {
                                fi
 
                                cat <<-EOF >&1
-                               url = "${XML_PREFIX}&timeframe=1&stationID=${id}&Year=${year}&Month=${month}&Day=1"
-                               output = "${OUTPUT_PREFIX}/hourly/${id}/${year}-$(printf "%02i" ${month}).xml"
+                               url = "${XML_PREFIX}&timeframe=1&stationID=${sid}&Year=${year}&Month=${month}&Day=1"
+                               output = "${OUTPUT_PREFIX}/hourly/${cid}/${year}-$(printf "%02i" ${month}).xml"
 
                                EOF
                        done
@@ -79,8 +80,8 @@ function generateStation() {
 
        # Generate almanac link
        cat <<-EOF >&1
-       url = "${XML_PREFIX}&timeframe=4&stationID=${id}"
-       output = "${OUTPUT_PREFIX}/almanac/${id}.xml"
+       url = "${XML_PREFIX}&timeframe=4&stationID=${sid}"
+       output = "${OUTPUT_PREFIX}/almanac/${cid}.xml"
 
        EOF
 }
@@ -125,6 +126,7 @@ do
                        continue
                fi
 
+               CLIMATE_ID="$(echo "${station}" | awk -F'"' '{print $6}')"
                STATION_ID="$(echo "${station}" | awk -F'"' '{print $8}')"
                STATION_HOURLY_START=$(echo "${station}" | awk -F'"' '{print $28}')
                STATION_HOURLY_END=$(echo "${station}" | awk -F'"' '{print $30}')
@@ -132,7 +134,7 @@ do
                STATION_DAILY_END=$(echo "${station}" | awk -F'"' '{print $34}')
                STATION_MONTHLY_START=$(echo "${station}" | awk -F'"' '{print $36}')
                STATION_MONTHLY_END=$(echo "${station}" | awk -F'"' '{print $38}')
-               generateStation "${STATION_ID}" "${STATION_MONTHLY_START}" "${STATION_MONTHLY_END}" "${STATION_DAILY_START}" "${STATION_DAILY_END}" "${STATION_HOURLY_START}" "${STATION_HOURLY_END}"
+               generateStation "${CLIMATE_ID}" "${STATION_ID}" "${STATION_MONTHLY_START}" "${STATION_MONTHLY_END}" "${STATION_DAILY_START}" "${STATION_DAILY_END}" "${STATION_HOURLY_START}" "${STATION_HOURLY_END}"
        else
                if [ "${HEADER}" -eq 0 ]
                then
index 5d7b01071c177c6f81abaf4dfe7d4c5a85195f31..5381bb5933d8f1bc3903f2ef9ee1134b061f558e 100755 (executable)
@@ -42,20 +42,20 @@ do
        declare -i MINUTE_COUNT=$(xmlstarlet sel -t -v 'count(//stationdata[@minute])' "${REPLY}")
        declare -i AL_MONTH_COUNT=$(xmlstarlet sel -t -v 'count(//month)' "${REPLY}")
 
-       # Detect station id
-       declare -i LAST_DIR=$(basename $(dirname "${REPLY}"))
-       declare -i FILENAME=$(basename "${REPLY%.*}")
+       # Detect climate id
+       declare LAST_DIR=$(basename $(dirname "${REPLY}"))
+       declare FILENAME=$(basename "${REPLY%.*}")
 
-       if [ "${LAST_DIR}" -gt 0 ]
+       if [[ "${LAST_DIR}" =~ ^[A-Z0-9]{7}$ ]]
        then
-               # Station id is in last directory name
-               STATION_ID="${LAST_DIR}"
-       elif [ "${FILENAME}" -gt 0 ]
+               # Climate id is in last directory name
+               CLIMATE_ID="${LAST_DIR}"
+       elif [[ "${FILENAME}" =~ ^[A-Z0-9]{7}$ ]]
        then
-               # Station id is in file name
-               STATION_ID="${FILENAME}"
+               # Climate id is in file name
+               CLIMATE_ID="${FILENAME}"
        else
-               echo "${REPLY}: Cannot detect station id"
+               echo "${REPLY}: Cannot detect climate id"
                exit 1
        fi
 
@@ -88,12 +88,12 @@ do
             [ ${MINUTE_COUNT} -eq 0 ]
        then
                STYLESHEET_PATH='monthly_to_commons.xslt'
-               DESTINATION_PATH="${DESTINATION}/weather.gc.ca/Monthly/${STATION_ID}.tab"
+               DESTINATION_PATH="${DESTINATION}/weather.gc.ca/Monthly/${CLIMATE_ID}.tab"
        elif [ ${STATIONDATA_COUNT} -eq 0 ]&& \
             [ ${AL_MONTH_COUNT} -gt 0 ]
        then
                STYLESHEET_PATH='almanac_to_commons.xslt'
-               DESTINATION_PATH="${DESTINATION}/weather.gc.ca/Almanac/${STATION_ID}.tab"
+               DESTINATION_PATH="${DESTINATION}/weather.gc.ca/Almanac/${CLIMATE_ID}.tab"
        else
                echo "${REPLY}: Cannot detect file type. Exiting."
                exit 1