Skip to content

Commit

Permalink
fix: creates master br if not exists, tune resources
Browse files Browse the repository at this point in the history
  • Loading branch information
leo-the-nardo committed Nov 9, 2024
1 parent 1c327fd commit 4ceb727
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 2 deletions.
7 changes: 7 additions & 0 deletions spark-jobs/brazilian-finder/brazilian-finder_spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,13 @@ def spark_job(spark: SparkSession, params, *args, **kwargs):
matching_emails_df = spark.sql("SELECT email_tel FROM matching_emails")
matching_emails_df.write.mode("overwrite").text(s3_output_path)

# Check if the master path exists, create if it doesn't
try:
spark.read.format("delta").load(s3_master_combo_path)
except Exception as e:
# If the path does not exist, create an empty Delta table
empty_df = spark.createDataFrame([], schema="email_tel STRING")
empty_df.write.format("delta").save(s3_master_combo_path)
# Append the matching emails to the master delta table
spark.sql(f"""
INSERT INTO delta.`{s3_master_combo_path}`
Expand Down
4 changes: 2 additions & 2 deletions spark-jobs/brazilian-finder/brazilian-finder_spark.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ spec:
labels:
version: "3.5.3"
executor:
cores: 1
cores: 3
instances: 2
memory: "1024m"
memory: "4096m"
envFrom:
- secretRef:
name: s3-onprem-combopurifier
Expand Down

0 comments on commit 4ceb727

Please sign in to comment.