1+ # ruff: noqa: EM102, TRY003
2+
13import logging
24import os
5+ from typing import Any , ClassVar
6+
37
4- GIS_SOURCES = ["gismit" , "gisogm" ]
5- INDEX_ALIASES = {
6- "rdi" : ["jpal" , "whoas" , "zenodo" ],
7- "timdex" : ["alma" , "aspace" , "dspace" ],
8- "geo" : GIS_SOURCES ,
9- }
10- REQUIRED_ENV = {
11- "TIMDEX_ALMA_EXPORT_BUCKET_ID" ,
12- "TIMDEX_S3_EXTRACT_BUCKET_ID" ,
13- "WORKSPACE" ,
14- }
15- REQUIRED_FIELDS = ("next-step" , "run-date" , "run-type" , "source" )
16- REQUIRED_OAI_HARVEST_FIELDS = ("oai-pmh-host" , "oai-metadata-format" )
17- VALID_DATE_FORMATS = ("%Y-%m-%d" , "%Y-%m-%dT%H:%M:%SZ" )
18- VALID_RUN_TYPES = ("full" , "daily" )
19- VALID_STEPS = ("extract" , "transform" , "load" )
20-
21-
22- def check_verbosity (verbose : bool | str ) -> bool : # noqa: FBT001
23- """Determine whether verbose is True or False given a boolean or string value."""
24- if isinstance (verbose , bool ):
25- return verbose
26- return verbose .lower () == "true"
8+ class Config :
9+ REQUIRED_ENV_VARS = (
10+ "TIMDEX_ALMA_EXPORT_BUCKET_ID" ,
11+ "TIMDEX_S3_EXTRACT_BUCKET_ID" ,
12+ "WORKSPACE" ,
13+ )
14+ OPTIONAL_ENV_VARS = ()
15+
16+ GIS_SOURCES = ("gismit" , "gisogm" )
17+ INDEX_ALIASES : ClassVar = {
18+ "rdi" : ["jpal" , "whoas" , "zenodo" ],
19+ "timdex" : ["alma" , "aspace" , "dspace" ],
20+ "geo" : GIS_SOURCES ,
21+ }
22+ REQUIRED_FIELDS = ("next-step" , "run-date" , "run-type" , "source" )
23+ REQUIRED_OAI_HARVEST_FIELDS = ("oai-pmh-host" , "oai-metadata-format" )
24+ VALID_DATE_FORMATS = ("%Y-%m-%d" , "%Y-%m-%dT%H:%M:%SZ" )
25+ VALID_RUN_TYPES = ("full" , "daily" )
26+ VALID_STEPS = ("extract" , "transform" , "load" )
27+
28+ def __getattr__ (self , name : str ) -> Any : # noqa: ANN401
29+ """Provide dot notation access to configurations and env vars on this class."""
30+ if name in self .REQUIRED_ENV_VARS or name in self .OPTIONAL_ENV_VARS :
31+ return os .getenv (name )
32+ message = f"'{ name } ' not a valid configuration variable"
33+ raise AttributeError (message )
34+
35+ def check_required_env_vars (self ) -> None :
36+ """Method to raise exception if required env vars not set."""
37+ missing_vars = [var for var in self .REQUIRED_ENV_VARS if not os .getenv (var )]
38+ if missing_vars :
39+ message = f"Missing required environment variables: { ', ' .join (missing_vars )} "
40+ raise OSError (message )
41+
42+ @staticmethod
43+ def get_verbose_flag (verbose : bool | str ) -> bool : # noqa: FBT001
44+ """Determine whether verbose is True or False given a boolean or string value."""
45+ if isinstance (verbose , bool ):
46+ return verbose
47+ return verbose .lower () == "true"
48+
49+ @property
50+ def alma_export_bucket (self ) -> str :
51+ var = "TIMDEX_ALMA_EXPORT_BUCKET_ID"
52+ value = os .getenv (var )
53+ if not value :
54+ raise OSError (f"Env var '{ var } ' must be defined" )
55+ return value
56+
57+ @property
58+ def timdex_bucket (self ) -> str :
59+ var = "TIMDEX_S3_EXTRACT_BUCKET_ID"
60+ value = os .getenv (var )
61+ if not value :
62+ raise OSError (f"Env var '{ var } ' must be defined" )
63+ return value
64+
65+ @property
66+ def s3_timdex_dataset_data_location (self ) -> str :
67+ """Return full S3 URI (bucket + prefix) of ETL records data location."""
68+ return f"s3://{ self .timdex_bucket } /dataset/data/records"
2769
2870
2971def configure_logger (
@@ -34,9 +76,9 @@ def configure_logger(
3476) -> str :
3577 """Configure application via passed application root logger.
3678
37- If verbose=True, 3rd party libraries can be quite chatty. For convenience, they can
38- be set to WARNING level by either passing a comma seperated list of logger names to
39- 'warning_only_loggers' or by setting the env var WARNING_ONLY_LOGGERS.
79+ If verbose=True, 3rd party libraries can be quite chatty. For convenience, they
80+ can be set to WARNING level by either passing a comma seperated list of logger
81+ names to 'warning_only_loggers' or by setting the env var WARNING_ONLY_LOGGERS.
4082 """
4183 if verbose :
4284 root_logger .setLevel (logging .DEBUG )
@@ -61,55 +103,3 @@ def configure_logger(
61103 f"Logger '{ root_logger .name } ' configured with level="
62104 f"{ logging .getLevelName (root_logger .getEffectiveLevel ())} "
63105 )
64-
65-
66- def validate_input (input_data : dict ) -> None :
67- """Validate input to the lambda function.
68-
69- Ensures that all requiered input fields are present and contain valid data.
70- """
71- # All required fields are present
72- if missing_fields := [field for field in REQUIRED_FIELDS if field not in input_data ]:
73- message = (
74- f"Input must include all required fields. Missing fields: { missing_fields } "
75- )
76- raise ValueError (message )
77-
78- # Valid next step
79- next_step = input_data ["next-step" ]
80- if next_step not in VALID_STEPS :
81- message = (
82- f"Input 'next-step' value must be one of: { VALID_STEPS } . Value "
83- f"provided was '{ next_step } '"
84- )
85- raise ValueError (message )
86-
87- # Valid run type
88- run_type = input_data ["run-type" ]
89- if run_type not in VALID_RUN_TYPES :
90- message = (
91- f"Input 'run-type' value must be one of: { VALID_RUN_TYPES } . Value "
92- f"provided was '{ run_type } '"
93- )
94- raise ValueError (message )
95-
96- # If next step is extract step, required harvest fields are present
97- # ruff: noqa: SIM102
98- if input_data ["next-step" ] == "extract" :
99- if input_data ["source" ] not in GIS_SOURCES :
100- if missing_harvest_fields := [
101- field for field in REQUIRED_OAI_HARVEST_FIELDS if field not in input_data
102- ]:
103- message = (
104- "Input must include all required harvest fields when starting with "
105- f"harvest step. Missing fields: { missing_harvest_fields } "
106- )
107- raise ValueError (message )
108-
109-
110- def verify_env () -> None :
111- """Confirm that required env variables are set."""
112- for key in REQUIRED_ENV :
113- if not os .getenv (key ):
114- message = f"Required env variable { key } is not set"
115- raise RuntimeError (message )
0 commit comments