diff --git a/c_glib/parquet-glib/arrow-file-writer.cpp b/c_glib/parquet-glib/arrow-file-writer.cpp index 2b8e2bdeac026..738fb4fd824c8 100644 --- a/c_glib/parquet-glib/arrow-file-writer.cpp +++ b/c_glib/parquet-glib/arrow-file-writer.cpp @@ -574,7 +574,6 @@ gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer, /** * gparquet_arrow_file_writer_new_row_group: * @writer: A #GParquetArrowFileWriter. - * @chunk_size: The max number of rows in a row group. * @error: (nullable): Return location for a #GError or %NULL. * * Start a new row group. @@ -584,13 +583,11 @@ gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer, * Since: 18.0.0 */ gboolean -gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer, - gsize chunk_size, - GError **error) +gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer, GError **error) { auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer); return garrow::check(error, - parquet_arrow_file_writer->NewRowGroup(chunk_size), + parquet_arrow_file_writer->NewRowGroup(), "[parquet][arrow][file-writer][new-row-group]"); } diff --git a/c_glib/parquet-glib/arrow-file-writer.h b/c_glib/parquet-glib/arrow-file-writer.h index 2c82f7c1f87de..4986430c951d0 100644 --- a/c_glib/parquet-glib/arrow-file-writer.h +++ b/c_glib/parquet-glib/arrow-file-writer.h @@ -135,9 +135,7 @@ gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer, GPARQUET_AVAILABLE_IN_18_0 gboolean -gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer, - gsize chunk_size, - GError **error); +gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer, GError **error); GPARQUET_AVAILABLE_IN_18_0 gboolean diff --git a/c_glib/test/parquet/test-arrow-file-writer.rb b/c_glib/test/parquet/test-arrow-file-writer.rb index d8344bf1c50b0..418de4782d0b0 100644 --- a/c_glib/test/parquet/test-arrow-file-writer.rb +++ b/c_glib/test/parquet/test-arrow-file-writer.rb @@ -89,10 +89,10 @@ def test_write_table def test_write_chunked_array schema = build_schema("enabled" => :boolean) writer = Parquet::ArrowFileWriter.new(schema, @file.path) - writer.new_row_group(2) + writer.new_row_group chunked_array = Arrow::ChunkedArray.new([build_boolean_array([true, nil])]) writer.write_chunked_array(chunked_array) - writer.new_row_group(1) + writer.new_row_group chunked_array = Arrow::ChunkedArray.new([build_boolean_array([false])]) writer.write_chunked_array(chunked_array) writer.close diff --git a/cpp/src/arrow/dataset/file_parquet_test.cc b/cpp/src/arrow/dataset/file_parquet_test.cc index 2c05dcd9be459..536fcdb21c107 100644 --- a/cpp/src/arrow/dataset/file_parquet_test.cc +++ b/cpp/src/arrow/dataset/file_parquet_test.cc @@ -85,7 +85,6 @@ class ParquetFormatHelper { static Status WriteRecordBatch(const RecordBatch& batch, parquet::arrow::FileWriter* writer) { auto schema = batch.schema(); - auto size = batch.num_rows(); if (!schema->Equals(*writer->schema(), false)) { return Status::Invalid("RecordBatch schema does not match this writer's. batch:'", @@ -93,7 +92,7 @@ class ParquetFormatHelper { "'"); } - RETURN_NOT_OK(writer->NewRowGroup(size)); + RETURN_NOT_OK(writer->NewRowGroup()); for (int i = 0; i < batch.num_columns(); i++) { RETURN_NOT_OK(writer->WriteColumnChunk(*batch.column(i))); } diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc index 856c032c3588a..cedcebbfb6e33 100644 --- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc +++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc @@ -739,7 +739,7 @@ class ParquetIOTestBase : public ::testing::Test { ASSERT_OK_NO_THROW(FileWriter::Make(::arrow::default_memory_pool(), MakeWriter(schema), arrow_schema, default_arrow_writer_properties(), &writer)); - ASSERT_OK_NO_THROW(writer->NewRowGroup(values->length())); + ASSERT_OK_NO_THROW(writer->NewRowGroup()); ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*values)); ASSERT_OK_NO_THROW(writer->Close()); // writer->Close() should be idempotent @@ -1053,7 +1053,7 @@ TYPED_TEST(TestParquetIO, SingleColumnRequiredChunkedWrite) { this->MakeWriter(schema), arrow_schema, default_arrow_writer_properties(), &writer)); for (int i = 0; i < 4; i++) { - ASSERT_OK_NO_THROW(writer->NewRowGroup(chunk_size)); + ASSERT_OK_NO_THROW(writer->NewRowGroup()); std::shared_ptr sliced_array = values->Slice(i * chunk_size, chunk_size); ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*sliced_array)); } @@ -1126,7 +1126,7 @@ TYPED_TEST(TestParquetIO, SingleColumnOptionalChunkedWrite) { this->MakeWriter(schema), arrow_schema, default_arrow_writer_properties(), &writer)); for (int i = 0; i < 4; i++) { - ASSERT_OK_NO_THROW(writer->NewRowGroup(chunk_size)); + ASSERT_OK_NO_THROW(writer->NewRowGroup()); std::shared_ptr sliced_array = values->Slice(i * chunk_size, chunk_size); ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*sliced_array)); } @@ -5149,7 +5149,7 @@ class TestIntegerAnnotateDecimalTypeParquetIO : public TestParquetIO { ::arrow::default_memory_pool(), ParquetFileWriter::Open(this->sink_, schema_node, writer_properties), arrow_schema, default_arrow_writer_properties(), &writer)); - ASSERT_OK_NO_THROW(writer->NewRowGroup(values->length())); + ASSERT_OK_NO_THROW(writer->NewRowGroup()); ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*values)); ASSERT_OK_NO_THROW(writer->Close()); } @@ -5481,7 +5481,7 @@ TEST(TestArrowReadWrite, OperationsOnClosedWriter) { // Operations on closed writer are invalid ASSERT_OK(writer->Close()); - ASSERT_RAISES(Invalid, writer->NewRowGroup(1)); + ASSERT_RAISES(Invalid, writer->NewRowGroup()); ASSERT_RAISES(Invalid, writer->WriteColumnChunk(table->column(0), 0, 1)); ASSERT_RAISES(Invalid, writer->NewBufferedRowGroup()); ASSERT_OK_AND_ASSIGN(auto record_batch, table->CombineChunksToBatch()); diff --git a/cpp/src/parquet/arrow/writer.cc b/cpp/src/parquet/arrow/writer.cc index 463713df1b1aa..c6d86648c1d63 100644 --- a/cpp/src/parquet/arrow/writer.cc +++ b/cpp/src/parquet/arrow/writer.cc @@ -305,7 +305,7 @@ class FileWriterImpl : public FileWriter { default_arrow_reader_properties(), &schema_manifest_); } - Status NewRowGroup(int64_t chunk_size) override { + Status NewRowGroup() override { RETURN_NOT_OK(CheckClosed()); if (row_group_writer_ != nullptr) { PARQUET_CATCH_NOT_OK(row_group_writer_->Close()); @@ -379,7 +379,7 @@ class FileWriterImpl : public FileWriter { } auto WriteRowGroup = [&](int64_t offset, int64_t size) { - RETURN_NOT_OK(NewRowGroup(size)); + RETURN_NOT_OK(NewRowGroup()); for (int i = 0; i < table.num_columns(); i++) { RETURN_NOT_OK(WriteColumnChunk(table.column(i), offset, size)); } diff --git a/cpp/src/parquet/arrow/writer.h b/cpp/src/parquet/arrow/writer.h index 4e1ddafd9a082..e36b8f252c750 100644 --- a/cpp/src/parquet/arrow/writer.h +++ b/cpp/src/parquet/arrow/writer.h @@ -87,9 +87,14 @@ class PARQUET_EXPORT FileWriter { /// \brief Start a new row group. /// /// Returns an error if not all columns have been written. + virtual ::arrow::Status NewRowGroup() = 0; + + /// \brief Start a new row group. /// - /// \param chunk_size the number of rows in the next row group. - virtual ::arrow::Status NewRowGroup(int64_t chunk_size) = 0; + /// \deprecated Deprecated in 19.0.0. + ARROW_DEPRECATED( + "Deprecated in 19.0.0. Use NewRowGroup() without the `chunk_size` argument.") + virtual ::arrow::Status NewRowGroup(int64_t chunk_size) { return NewRowGroup(); } /// \brief Write ColumnChunk in row group using an array. virtual ::arrow::Status WriteColumnChunk(const ::arrow::Array& data) = 0; diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd index d6aebd8284f4a..71e93ce0a4c51 100644 --- a/python/pyarrow/_parquet.pxd +++ b/python/pyarrow/_parquet.pxd @@ -556,7 +556,7 @@ cdef extern from "parquet/arrow/writer.h" namespace "parquet::arrow" nogil: const shared_ptr[ArrowWriterProperties]& arrow_properties) CStatus WriteTable(const CTable& table, int64_t chunk_size) - CStatus NewRowGroup(int64_t chunk_size) + CStatus NewRowGroup() CStatus Close() CStatus AddKeyValueMetadata(const shared_ptr[const CKeyValueMetadata]& key_value_metadata)