Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SUP-6598: update-public-ephemeral-spark-example #45

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion examples/ephemeral-spark/ec2-key-pair.tf
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ resource "tls_private_key" "emr_private_key" {
module "emr_key_pair" {
source = "terraform-aws-modules/key-pair/aws"
version = "1.0.0"
key_name = "${var.name_prefix}-key"
key_name = "${local.name_prefix}-key"
public_key = tls_private_key.emr_private_key.public_key_openssh
}

# Create a pem file with restricted permissions
resource "local_sensitive_file" "emr_private_key_file" {
content = tls_private_key.emr_private_key.private_key_pem
filename = "./${local.name_prefix}-key.pem"
file_permission = "0600"
}
8 changes: 4 additions & 4 deletions examples/ephemeral-spark/emr-buckets.tf
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Set up logs bucket with read/write permissions
module "s3-logs" {
source = "git::[email protected]:Datatamer/terraform-aws-s3.git?ref=1.3.2"
bucket_name = "${var.name_prefix}-logs"
source = "git::[email protected]:Datatamer/terraform-aws-s3.git?ref=1.3.3"
bucket_name = "${local.name_prefix}-logs"
read_write_actions = [
"s3:PutObject",
"s3:GetObject",
Expand All @@ -18,8 +18,8 @@ module "s3-logs" {

# Set up root directory bucket
module "s3-data" {
source = "git::[email protected]:Datatamer/terraform-aws-s3.git?ref=1.3.2"
bucket_name = "${var.name_prefix}-data"
source = "git::[email protected]:Datatamer/terraform-aws-s3.git?ref=1.3.3"
bucket_name = "${local.name_prefix}-data"
read_write_actions = [
"s3:GetBucketLocation",
"s3:GetBucketCORS",
Expand Down
42 changes: 42 additions & 0 deletions examples/ephemeral-spark/emr-hbase-config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
[
{
"Classification":"emrfs-site",
"Properties":{
"fs.s3.consistent":"false",
"fs.s3.maxConnections":"50000",
"fs.s3.enableServerSideEncryption": "true",
"fs.s3a.enableServerSideEncryption":"true",
"fs.s3.create.allowFileNameEndsWithFolderSuffix": "true"
}
},
{
"Classification": "hbase-site",
"Properties": {
"hbase.rootdir": "s3://${emr_hbase_s3_bucket_root_dir}/hbase-data/",
"hbase.client.scanner.timeout.period":"600000",
"hbase.hstore.blockingStoreFiles":"200",
"hbase.hregion.memstore.block.multiplier":"8",
"hbase.hregion.memstore.flush.size":"536870912",
"hbase.rpc.timeout":"600000",
"hbase.zookeeper.property.tickTime":"3000",
"zookeeper.session.timeout":"60000"
}
},
{
"Classification": "hbase",
"Properties": {
"hbase.emr.storageMode":"s3"
}
},
{
"Classification": "hbase-env",
"Properties": {},
"Configurations": [{
"Classification": "export",
"Properties": {
"HBASE_MASTER_OPTS": "-Xmx26624m",
"HBASE_REGIONSERVER_OPTS": "-Xmx26624m"
}
}]
}
]
12 changes: 12 additions & 0 deletions examples/ephemeral-spark/emr-spark-config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[
{
"Classification":"emrfs-site",
"Properties":{
"fs.s3.consistent":"false",
"fs.s3.maxConnections":"50000",
"fs.s3.enableServerSideEncryption": "true",
"fs.s3a.enableServerSideEncryption":"true",
"fs.s3.create.allowFileNameEndsWithFolderSuffix": "true"
}
}
]
41 changes: 32 additions & 9 deletions examples/ephemeral-spark/ephemeral-spark.tf
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# Ephemeral Spark cluster
module "ephemeral-spark-sgs" {
source = "git::[email protected]:Datatamer/terraform-aws-emr.git//modules/aws-emr-sgs?ref=9.0.0"
vpc_id = var.vpc_id
emr_managed_sg_name = format("%s-%s", var.name_prefix, "Ephem-Spark-Internal")
vpc_id = local.vpc_id
emr_managed_sg_name = format("%s-%s", local.name_prefix, "Ephem-Spark-Internal")
emr_service_access_sg_ids = module.aws-emr-sg-service-access.security_group_ids
tags = merge(var.tags, var.emr_tags)
tags = module.tags.tags
}

module "ephemeral-spark-iam" {
Expand All @@ -14,10 +14,33 @@ module "ephemeral-spark-iam" {
module.s3-logs.rw_policy_arn,
module.s3-data.rw_policy_arn
]
vpc_id = var.vpc_id
emr_service_iam_policy_name = "${var.name_prefix}-spark-service-policy"
emr_service_role_name = "${var.name_prefix}-spark-service-role"
emr_ec2_instance_profile_name = "${var.name_prefix}-spark-emr-instance-profile"
emr_ec2_role_name = "${var.name_prefix}-spark-ec2-role"
tags = var.tags
vpc_id = local.vpc_id
emr_service_iam_policy_name = "${local.name_prefix}-spark-service-policy"
emr_service_role_name = "${local.name_prefix}-spark-service-role"
emr_ec2_instance_profile_name = "${local.name_prefix}-spark-emr-instance-profile"
emr_ec2_role_name = "${local.name_prefix}-spark-ec2-role"
tags = module.tags.tags
}

module "ephemeral-spark-config" {
source = "git::[email protected]:Datatamer/terraform-aws-emr.git//modules/aws-emr-config?ref=9.0.0"
create_static_cluster = false
emr_config_file_path = "${path.module}/emr-spark-config.json"
bucket_name_for_root_directory = module.s3-data.bucket_name

utility_script_bucket_key = "ephemeral-spark-util/upload_hbase_config.sh"
hadoop_config_path = "ephemeral-spark-config/hadoop/conf/"
hbase_config_path = "ephemeral-spark-config/hbase/conf.dist/"
}

module "ephemeral-spark-sg-service-access" {
source = "git::[email protected]:Datatamer/terraform-aws-security-groups.git?ref=1.0.1"
vpc_id = local.vpc_id
ingress_ports = module.aws-vm-sg-ports.ingress_ports
ingress_cidr_blocks = local.ingress_cidr_blocks
egress_cidr_blocks = local.egress_cidr_blocks
sg_name_prefix = format("%s-%s", local.name_prefix, "spark-emr-service-access")
egress_protocol = "all"
ingress_protocol = "tcp"
tags = module.tags.tags
}
74 changes: 39 additions & 35 deletions examples/ephemeral-spark/hbase-cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,19 @@ module "emr-hbase" {

# Configurations
create_static_cluster = true
release_label = "emr-6.6.0" # hbase 2.4.4
release_label = "emr-6.8.0"
applications = local.applications
emr_config_file_path = "${path.module}/../emr.json"
bucket_path_to_logs = "logs/${var.name_prefix}-hbase/"
tags = merge(var.tags, var.emr_tags)
abac_valid_tags = var.emr_abac_valid_tags
emr_config_file_path = "./emr-hbase-config.json"
bucket_path_to_logs = "logs/${local.name_prefix}-hbase/"
tags = module.tags.tags

utility_script_bucket_key = "emr-hbase-util/upload_hbase_config.sh"
hadoop_config_path = "emr-hbase-config/hadoop/conf/"
hbase_config_path = "emr-hbase-config/hbase/conf.dist/"

# Networking
subnet_id = var.compute_subnet_id
vpc_id = var.vpc_id
subnet_id = local.compute_subnet_id
vpc_id = local.vpc_id
# Security Group IDs
emr_managed_master_sg_ids = module.aws-emr-sg-master.security_group_ids
emr_managed_core_sg_ids = module.aws-emr-sg-core.security_group_ids
Expand All @@ -32,22 +35,22 @@ module "emr-hbase" {
key_pair_name = module.emr_key_pair.key_pair_key_name

# Names
cluster_name = "${var.name_prefix}-HBase-Cluster"
emr_service_role_name = "${var.name_prefix}-hbase-service-role"
emr_ec2_role_name = "${var.name_prefix}-hbase-ec2-role"
emr_ec2_instance_profile_name = "${var.name_prefix}-hbase-emr-instance-profile"
emr_service_iam_policy_name = "${var.name_prefix}-hbase-service-policy"
master_instance_fleet_name = "${var.name_prefix}-HBaseMasterInstanceGroup"
core_instance_fleet_name = "${var.name_prefix}-HBaseCoreInstanceGroup"
emr_managed_sg_name = "${var.name_prefix}-EMR-Managed"
cluster_name = "${local.name_prefix}-HBase-Cluster"
emr_service_role_name = "${local.name_prefix}-hbase-service-role"
emr_ec2_role_name = "${local.name_prefix}-hbase-ec2-role"
emr_ec2_instance_profile_name = "${local.name_prefix}-hbase-emr-instance-profile"
emr_service_iam_policy_name = "${local.name_prefix}-hbase-service-policy"
master_instance_fleet_name = "${local.name_prefix}-HBaseMasterInstanceGroup"
core_instance_fleet_name = "${local.name_prefix}-HBaseCoreInstanceGroup"
emr_managed_sg_name = "${local.name_prefix}-EMR-Managed"

# Scale
master_instance_on_demand_count = 1
core_instance_on_demand_count = 4
master_instance_type = "m6g.xlarge"
core_instance_type = "r6g.xlarge"
master_ebs_size = 50
core_ebs_size = 200
master_instance_on_demand_count = local.hbase_master_instance_on_demand_count
core_instance_on_demand_count = local.hbase_core_instance_on_demand_count
master_instance_type = local.hbase_master_instance_type
core_instance_type = local.hbase_core_instance_type
master_ebs_size = local.hbase_master_ebs_size
core_ebs_size = local.hbase_core_ebs_size
}

module "sg-ports-emr" {
Expand All @@ -58,37 +61,38 @@ module "sg-ports-emr" {

module "aws-emr-sg-master" {
source = "git::[email protected]:Datatamer/terraform-aws-security-groups.git?ref=1.0.1"
vpc_id = var.vpc_id
ingress_cidr_blocks = var.ingress_cidr_blocks
vpc_id = local.vpc_id
ingress_cidr_blocks = local.ingress_cidr_blocks
ingress_security_groups = concat(module.aws-sg-vm.security_group_ids, [module.ephemeral-spark-sgs.emr_managed_sg_id])
egress_cidr_blocks = var.egress_cidr_blocks
egress_cidr_blocks = local.egress_cidr_blocks
ingress_ports = module.sg-ports-emr.ingress_master_ports
sg_name_prefix = format("%s-%s", var.name_prefix, "emr-master")
sg_name_prefix = format("%s-%s", local.name_prefix, "emr-master")
egress_protocol = "all"
ingress_protocol = "tcp"
tags = merge(var.tags, var.emr_tags)
tags = module.tags.tags
}

module "aws-emr-sg-core" {
source = "git::[email protected]:Datatamer/terraform-aws-security-groups.git?ref=1.0.1"
vpc_id = var.vpc_id
ingress_cidr_blocks = var.ingress_cidr_blocks
vpc_id = local.vpc_id
ingress_cidr_blocks = local.ingress_cidr_blocks
ingress_security_groups = concat(module.aws-sg-vm.security_group_ids, [module.ephemeral-spark-sgs.emr_managed_sg_id])
egress_cidr_blocks = var.egress_cidr_blocks
egress_cidr_blocks = local.egress_cidr_blocks
ingress_ports = module.sg-ports-emr.ingress_core_ports
sg_name_prefix = format("%s-%s", var.name_prefix, "emr-core")
sg_name_prefix = format("%s-%s", local.name_prefix, "emr-core")
egress_protocol = "all"
ingress_protocol = "tcp"
tags = merge(var.tags, var.emr_tags)
tags = module.tags.tags
}

module "aws-emr-sg-service-access" {
source = "git::[email protected]:Datatamer/terraform-aws-security-groups.git?ref=1.0.1"
vpc_id = var.vpc_id
ingress_cidr_blocks = var.ingress_cidr_blocks
vpc_id = local.vpc_id
ingress_cidr_blocks = local.ingress_cidr_blocks
egress_cidr_blocks = local.egress_cidr_blocks
ingress_ports = module.sg-ports-emr.ingress_service_access_ports
sg_name_prefix = format("%s-%s", var.name_prefix, "emr-service-access")
sg_name_prefix = format("%s-%s", local.name_prefix, "emr-service-access")
egress_protocol = "all"
ingress_protocol = "tcp"
tags = merge(var.tags, var.emr_tags)
tags = module.tags.tags
}
8 changes: 8 additions & 0 deletions examples/ephemeral-spark/label.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
module "tags" {
department = ""
environment = ""
owner = ""
product = ""
customer = ""
name = ""
}
12 changes: 0 additions & 12 deletions examples/ephemeral-spark/local.tfvars

This file was deleted.

3 changes: 3 additions & 0 deletions examples/ephemeral-spark/locals.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
locals {
name_prefix = "" # Enter a name prefix here to apply to all resources for the deployment
}
19 changes: 19 additions & 0 deletions examples/ephemeral-spark/network.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
locals {
vpc_id = "" # enter a valid vpc id

# Fill with valid subnets for ec2 and rds instances
ec2-private-a = ""
ec2-private-b = ""
ec2-private-c = ""
rds-private-a = ""
rds-private-b = ""
rds-private-c = ""

compute_subnet_id = local.ec2-private-a
data_subnet_ids = [local.ec2-private-a, local.ec2-private-b]

# Fill with corresponding cidr blocks
ingress_cidr_blocks = [""]
egress_cidr_blocks = [""]

}
19 changes: 8 additions & 11 deletions examples/ephemeral-spark/opensearch.tf
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@ module "tamr-opensearch-cluster" {
source = "git::[email protected]:Datatamer/terraform-aws-opensearch?ref=6.0.0"

# Names
domain_name = "${var.name_prefix}-opensearch"
domain_name = "${local.name_prefix}-opensearch"

# In-transit encryption options
node_to_node_encryption_enabled = true
enforce_https = true

# Networking
subnet_ids = [var.data_subnet_ids[0]]
vpc_id = local.vpc_id
subnet_ids = [local.data_subnet_ids[0]]
security_group_ids = module.aws-sg-opensearch.security_group_ids
}

Expand All @@ -18,19 +19,15 @@ module "sg-ports-opensearch" {
source = "git::[email protected]:Datatamer/terraform-aws-es.git//modules/es-ports?ref=5.0.0"
}

data "aws_subnet" "application_subnet" {
id = var.application_subnet_id
}

module "aws-sg-opensearch" {
source = "git::[email protected]:Datatamer/terraform-aws-security-groups.git?ref=1.0.1"
vpc_id = var.vpc_id
ingress_cidr_blocks = var.ingress_cidr_blocks
vpc_id = local.vpc_id
ingress_cidr_blocks = local.ingress_cidr_blocks
ingress_security_groups = concat(module.aws-sg-vm.security_group_ids, [module.ephemeral-spark-sgs.emr_managed_sg_id])
egress_cidr_blocks = var.egress_cidr_blocks
egress_cidr_blocks = local.egress_cidr_blocks
ingress_ports = module.sg-ports-opensearch.ingress_ports
sg_name_prefix = format("%s-%s", var.name_prefix, "-os")
tags = var.tags
sg_name_prefix = format("%s-%s", local.name_prefix, "-os")
tags = module.tags.tags
ingress_protocol = "tcp"
egress_protocol = "all"
}
Expand Down
6 changes: 5 additions & 1 deletion examples/ephemeral-spark/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,16 @@ output "ephemeral-spark-iam" {
value = module.ephemeral-spark-iam
}

output "ephemeral-spark-config" {
value = module.ephemeral-spark-config
}

output "ephemeral-spark-sgs" {
value = module.ephemeral-spark-sgs
}

output "tamr-config" {
value = module.tamr-config.tamr_config_file
value = module.tamr-config.rendered
sensitive = true
}

Expand Down
Loading