Skip to content

Commit

Permalink
Merge pull request #585 from Forchapeatl/Added-S3-Object-DataNode
Browse files Browse the repository at this point in the history
Added s3 object data node (AWS)
  • Loading branch information
trgiangdo authored Dec 20, 2023
2 parents aaad0e9 + 4175dca commit b826a99
Show file tree
Hide file tree
Showing 15 changed files with 525 additions and 10 deletions.
2 changes: 2 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ sqlalchemy = "==2.0.16"
toml = "==0.10"
twisted = "==23.8.0"
tzlocal = "==3.0"
boto3 = "==1.29.1"

[dev-packages]
autopep8 = "*"
Expand Down Expand Up @@ -73,6 +74,7 @@ types-python-dateutil = "*"
types-pytz = "*"
types-toml = ">=0.10.0"
types-tzlocal = "*"
moto = ">=4.2.9"

[requires]
python_version = "3"
Expand Down
1 change: 1 addition & 0 deletions taipy/core/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
("configure_pickle_data_node", DataNodeConfig._configure_pickle),
("configure_excel_data_node", DataNodeConfig._configure_excel),
("configure_generic_data_node", DataNodeConfig._configure_generic),
("configure_s3_object_data_node", DataNodeConfig._configure_s3_object),
],
)
_inject_section(
Expand Down
3 changes: 2 additions & 1 deletion taipy/core/config/checkers/_data_node_config_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ def _check_storage_type(self, data_node_config_id: str, data_node_config: DataNo
data_node_config._STORAGE_TYPE_KEY,
data_node_config.storage_type,
f"`{data_node_config._STORAGE_TYPE_KEY}` field of DataNodeConfig `{data_node_config_id}` must be"
f" either csv, sql_table, sql, mongo_collection, pickle, excel, generic, json, parquet, or in_memory.",
f" either csv, sql_table, sql, mongo_collection, pickle, excel, generic, json, parquet, s3_object,"
f" or in_memory.",
)

def _check_scope(self, data_node_config_id: str, data_node_config: DataNodeConfig):
Expand Down
44 changes: 43 additions & 1 deletion taipy/core/config/config.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,8 @@
"in_memory",
"generic",
"parquet",
""
"s3_object",
"",
],
"default": "pickle"
},
Expand Down Expand Up @@ -236,6 +237,30 @@
"description": "storage_type: parquet specific.Additional parameters when writing parquet files, default is an empty dictionary",
"type": "object"
},
"aws_access_key":{
"description": "storage_type: s3_object specific.Amazon Storage public key",
"type": "string"
},
"aws_secret_access_key":{
"description": "storage_type: s3_object specific.Amazon Storage secret key",
"type": "string"
},
"aws_s3_bucket_name":{
"description": "storage_type: s3_object specific.Name of Bucket",
"type": "string"
},
"aws_s3_object_key":{
"description": "storage_type: s3_object specific.File name",
"type": "string"
},
"aws_region":{
"description": "storage_type: s3_object specific.Bucket Location",
"type": "string"
},
"aws_s3_object_parameters":{
"description": "storage_type: s3_object specific.Additional parameters when accessing s3 object, default is an empty dictionary",
"type": "array"
},
"if": {
"properties": {
"storage_type": {
Expand Down Expand Up @@ -282,6 +307,23 @@
"required": [
"db_name"
],
"else": {
"if": {
"properties": {
"storage_type": {
"enum": [
"s3_object",
]
}
}
},
"then": {
"required": [
"aws_access_key",
"aws_secret_access_key",
"aws_s3_bucket_name",
"aws_s3_object_key"
],
"if": {
"properties": {
"storage_type": {
Expand Down
82 changes: 78 additions & 4 deletions taipy/core/config/data_node_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ class DataNodeConfig(Section):
Attributes:
id (str): Unique identifier of the data node config. It must be a valid Python variable name.
storage_type (str): Storage type of the data nodes created from the data node config. The possible values
are : "csv", "excel", "pickle", "sql_table", "sql", "mongo_collection", "generic", "json", "parquet" and
"in_memory".
are : "csv", "excel", "pickle", "sql_table", "sql", "mongo_collection", "generic", "json", "parquet",
"in_memory and "s3_object".
The default value is "pickle".
Note that the "in_memory" value can only be used when `JobConfig^`.mode is "standalone".
scope (Optional[Scope^]): The optional `Scope^` of the data nodes instantiated from the data node config.
Expand All @@ -57,6 +57,8 @@ class DataNodeConfig(Section):
_STORAGE_TYPE_VALUE_GENERIC = "generic"
_STORAGE_TYPE_VALUE_JSON = "json"
_STORAGE_TYPE_VALUE_PARQUET = "parquet"
_STORAGE_TYPE_VALUE_S3_OBJECT = "s3_object"

_DEFAULT_STORAGE_TYPE = _STORAGE_TYPE_VALUE_PICKLE
_ALL_STORAGE_TYPES = [
_STORAGE_TYPE_VALUE_PICKLE,
Expand All @@ -69,6 +71,7 @@ class DataNodeConfig(Section):
_STORAGE_TYPE_VALUE_GENERIC,
_STORAGE_TYPE_VALUE_JSON,
_STORAGE_TYPE_VALUE_PARQUET,
_STORAGE_TYPE_VALUE_S3_OBJECT,
]

_EXPOSED_TYPE_KEY = "exposed_type"
Expand Down Expand Up @@ -145,6 +148,13 @@ class DataNodeConfig(Section):
_OPTIONAL_COMPRESSION_PARQUET_PROPERTY = "compression"
_OPTIONAL_READ_KWARGS_PARQUET_PROPERTY = "read_kwargs"
_OPTIONAL_WRITE_KWARGS_PARQUET_PROPERTY = "write_kwargs"
# S3object
_REQUIRED_AWS_ACCESS_KEY_ID_PROPERTY = "aws_access_key"
_REQUIRED_AWS_SECRET_ACCESS_KEY_PROPERTY = "aws_secret_access_key"
_REQUIRED_AWS_STORAGE_BUCKET_NAME_PROPERTY = "aws_s3_bucket_name"
_REQUIRED_AWS_S3_OBJECT_KEY_PROPERTY = "aws_s3_object_key"
_OPTIONAL_AWS_REGION_PROPERTY = "aws_region"
_OPTIONAL_AWS_S3_OBJECT_PARAMETERS_PROPERTY = "aws_s3_object_parameters"

_REQUIRED_PROPERTIES: Dict[str, List] = {
_STORAGE_TYPE_VALUE_PICKLE: [],
Expand All @@ -169,6 +179,12 @@ class DataNodeConfig(Section):
_STORAGE_TYPE_VALUE_GENERIC: [],
_STORAGE_TYPE_VALUE_JSON: [],
_STORAGE_TYPE_VALUE_PARQUET: [],
_STORAGE_TYPE_VALUE_S3_OBJECT: [
_REQUIRED_AWS_ACCESS_KEY_ID_PROPERTY,
_REQUIRED_AWS_SECRET_ACCESS_KEY_PROPERTY,
_REQUIRED_AWS_STORAGE_BUCKET_NAME_PROPERTY,
_REQUIRED_AWS_S3_OBJECT_KEY_PROPERTY,
],
}

_OPTIONAL_PROPERTIES = {
Expand Down Expand Up @@ -241,6 +257,10 @@ class DataNodeConfig(Section):
_OPTIONAL_WRITE_KWARGS_PARQUET_PROPERTY: None,
_OPTIONAL_EXPOSED_TYPE_PARQUET_PROPERTY: _DEFAULT_EXPOSED_TYPE,
},
_STORAGE_TYPE_VALUE_S3_OBJECT: {
_OPTIONAL_AWS_REGION_PROPERTY: None,
_OPTIONAL_AWS_S3_OBJECT_PARAMETERS_PROPERTY: None,
},
}

_SCOPE_KEY = "scope"
Expand Down Expand Up @@ -380,8 +400,8 @@ def _set_default_configuration(
Parameters:
storage_type (str): The default storage type for all data node configurations.
The possible values are *"pickle"* (the default value), *"csv"*, *"excel"*,
*"sql"*, *"mongo_collection"*, *"in_memory"*, *"json"*, *"parquet"* or
*"generic"*.
*"sql"*, *"mongo_collection"*, *"in_memory"*, *"json"*, *"parquet"*, *"generic"*,
or *"s3_object"*.
scope (Optional[Scope^]): The default scope for all data node configurations.<br/>
The default value is `Scope.SCENARIO`.
validity_period (Optional[timedelta]): The duration since the last edit date for which the data node can be
Expand Down Expand Up @@ -465,6 +485,7 @@ def _configure(
cls._STORAGE_TYPE_VALUE_GENERIC: cls._configure_generic,
cls._STORAGE_TYPE_VALUE_JSON: cls._configure_json,
cls._STORAGE_TYPE_VALUE_PARQUET: cls._configure_parquet,
cls._STORAGE_TYPE_VALUE_S3_OBJECT: cls._configure_s3_object,
}

if storage_type in cls._ALL_STORAGE_TYPES:
Expand Down Expand Up @@ -1030,6 +1051,59 @@ def _configure_mongo_collection(
id, DataNodeConfig._STORAGE_TYPE_VALUE_MONGO_COLLECTION, scope, validity_period, **properties
)

@classmethod
def _configure_s3_object(
cls,
id: str,
aws_access_key: str,
aws_secret_access_key: str,
aws_s3_bucket_name: str,
aws_s3_object_key: str,
aws_region: Optional[str] = None,
aws_s3_object_parameters: Optional[Dict[str, Any]] = None,
scope: Optional[Scope] = None,
validity_period: Optional[timedelta] = None,
**properties,
) -> "DataNodeConfig":
"""Configure a new S3 object data node configuration.
Parameters:
id (str): The unique identifier of the new S3 Object data node configuration.
aws_access_key (str): Amazon Web Services ID for to identify account.
aws_secret_access_key (str): Amazon Web Services access key to authenticate programmatic requests.
aws_s3_bucket_name (str): The bucket in S3 to read from and to write the data to.
aws_region (Optional[str]): Self-contained geographic area where Amazon Web Services (AWS)
infrastructure is located.
aws_s3_object_parameters (Optional[dict[str, any]]): A dictionary of additional arguments to be passed
into AWS S3 bucket access string.
scope (Optional[Scope^]): The scope of the S3 Object data node configuration.<br/>
The default value is `Scope.SCENARIO`.
validity_period (Optional[timedelta]): The duration since the last edit date for which the data node can be
considered up-to-date. Once the validity period has passed, the data node is considered stale and
relevant tasks will run even if they are skippable (see the
[Task configs page](../core/config/task-config.md) for more details).
If *validity_period* is set to None, the data node is always up-to-date.
**properties (dict[str, any]): A keyworded variable length list of additional arguments.
Returns:
The new S3 object data node configuration.
"""
properties.update(
{
cls._REQUIRED_AWS_ACCESS_KEY_ID_PROPERTY: aws_access_key,
cls._REQUIRED_AWS_SECRET_ACCESS_KEY_PROPERTY: aws_secret_access_key,
cls._REQUIRED_AWS_STORAGE_BUCKET_NAME_PROPERTY: aws_s3_bucket_name,
cls._REQUIRED_AWS_S3_OBJECT_KEY_PROPERTY: aws_s3_object_key,
}
)

if aws_region is not None:
properties[cls._OPTIONAL_AWS_REGION_PROPERTY] = aws_region
if aws_s3_object_parameters is not None:
properties[cls._OPTIONAL_AWS_S3_OBJECT_PARAMETERS_PROPERTY] = aws_s3_object_parameters

return cls.__configure(id, DataNodeConfig._STORAGE_TYPE_VALUE_S3_OBJECT, scope, validity_period, **properties)

@staticmethod
def __configure(
id: str,
Expand Down
1 change: 1 addition & 0 deletions taipy/core/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@
from .pickle import PickleDataNode
from .sql import SQLDataNode
from .sql_table import SQLTableDataNode
from .aws_s3 import S3ObjectDataNode
Loading

0 comments on commit b826a99

Please sign in to comment.