From 0673c779d48ec15e66ab666ffc573daf56e8e714 Mon Sep 17 00:00:00 2001 From: Pierre Choffet Date: Fri, 24 Jan 2020 15:38:29 -0500 Subject: [PATCH] Add required scripts to generate almanac JSONs --- commons_rules.xsd | 7 ++++++- eccc_to_commons.sh | 5 ++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/commons_rules.xsd b/commons_rules.xsd index 169dd3e..49a50d3 100644 --- a/commons_rules.xsd +++ b/commons_rules.xsd @@ -3,7 +3,12 @@ historical data prior of its import into Wikidata. All tests done in ECCC's bulkschema.xsd aren't replicated here as we're - focusing into pure Wikimedia Commons data quality --> + focusing into pure Wikimedia Commons data quality. + + Schema is restrictive on purpose: up to date extreme values are hard coded + just like current year. It WILL break starting 2021 so we can make sure + someone has checked potential upstream structure change before running + Commons data update. --> diff --git a/eccc_to_commons.sh b/eccc_to_commons.sh index c580fbb..5d7b010 100755 --- a/eccc_to_commons.sh +++ b/eccc_to_commons.sh @@ -92,8 +92,7 @@ do elif [ ${STATIONDATA_COUNT} -eq 0 ]&& \ [ ${AL_MONTH_COUNT} -gt 0 ] then - echo "${REPLY}: Almanac data not compatible yet. Ignoring." >&2 - continue + STYLESHEET_PATH='almanac_to_commons.xslt' DESTINATION_PATH="${DESTINATION}/weather.gc.ca/Almanac/${STATION_ID}.tab" else echo "${REPLY}: Cannot detect file type. Exiting." @@ -102,5 +101,5 @@ do echo "Processing ${REPLY} to ${DESTINATION_PATH}…" >&2 mkdir -p "$(dirname "${DESTINATION_PATH}")" - xmlstarlet tr "${STYLESHEET_PATH}" "${REPLY}" > "${DESTINATION_PATH}" + xmlstarlet tr "${STYLESHEET_PATH}" "${REPLY}" | jq > "${DESTINATION_PATH}" done 9< <( find "${SOURCE}" -type f -name '*.xml' -print0 ) -- 2.42.0