From 621ff733dbfde6241117779919ccc51d887f5cbb Mon Sep 17 00:00:00 2001 From: dombean <46692370+dombean@users.noreply.github.com> Date: Fri, 15 Nov 2024 11:25:48 +0000 Subject: [PATCH] add features to insert_df_to_hive_table --- rdsa_utils/cdp/io/output.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rdsa_utils/cdp/io/output.py b/rdsa_utils/cdp/io/output.py index 98e5d35..a7d979f 100644 --- a/rdsa_utils/cdp/io/output.py +++ b/rdsa_utils/cdp/io/output.py @@ -48,13 +48,13 @@ def insert_df_to_hive_table( Parameters ---------- - spark : SparkSession + spark Active SparkSession. - df : SparkDF + df SparkDF containing data to be written. table_name : str Name of the Hive table to write data into. - overwrite : bool, optional + overwrite Controls how existing data is handled, default is False: For non-partitioned data: @@ -64,10 +64,10 @@ def insert_df_to_hive_table( For partitioned data: - True: Replaces data only in partitions present in DataFrame - False: Appends data to existing partitions or creates new ones - fill_missing_cols : bool, optional + fill_missing_cols If True, adds missing columns as nulls. If False, raises error on schema mismatch (default is False). - repartition_column : Union[int, str, None], optional + repartition_column Controls data repartitioning, default is None: - int: Sets target number of partitions - str: Specifies column to repartition by