Skip to content

Commit

Permalink
add record batch file reader
Browse files Browse the repository at this point in the history
  • Loading branch information
sgilmore10 committed Jun 12, 2024
1 parent 52c7ded commit e6a0ea1
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 2 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "arrow/io/file.h"
#include "arrow/io/interfaces.h"
#include "arrow/ipc/reader.h"
#include "arrow/matlab/error/error.h"
#include "arrow/matlab/io/ipc/proxy/record_batch_file_reader.h"
#include "arrow/util/utf8.h"


namespace arrow::matlab::io::ipc::proxy {

RecordBatchFileReader::RecordBatchFileReader(std::shared_ptr<arrow::ipc::RecordBatchFileReader> reader) : reader{std::move(reader)} {
REGISTER_METHOD(RecordBatchFileReader, getNumRecordBatches);
}

libmexclass::proxy::MakeResult RecordBatchFileReader::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) {
namespace mda = ::matlab::data;
using RecordBatchFileReaderProxy = arrow::matlab::io::ipc::proxy::RecordBatchFileReader;

mda::StructArray opts = constructor_arguments[0];
const mda::StringArray filename_mda = opts[0]["Filename"];

const auto filename_utf16 = std::u16string(filename_mda[0]);
MATLAB_ASSIGN_OR_ERROR(const auto filename_utf8,
arrow::util::UTF16StringToUTF8(filename_utf16),
error::UNICODE_CONVERSION_ERROR_ID);

MATLAB_ASSIGN_OR_ERROR(auto input_stream,
arrow::io::ReadableFile::Open(filename_utf8),
error::FAILED_TO_OPEN_FILE_FOR_WRITE);

MATLAB_ASSIGN_OR_ERROR(auto reader,
arrow::ipc::RecordBatchFileReader::Open(input_stream),
"arrow:matlab:MakeFailed");

return std::make_shared<RecordBatchFileReaderProxy>(std::move(reader));
}

void RecordBatchFileReader::getNumRecordBatches(libmexclass::proxy::method::Context& context) {
namespace mda = ::matlab::data;
mda::ArrayFactory factory;
const auto num_batches = reader->num_record_batches();
context.outputs[0] = factory.createScalar(num_batches);
}



} // namespace arrow::matlab::io::ipc::proxy
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include "arrow/ipc/reader.h"

#include "libmexclass/proxy/Proxy.h"

namespace arrow::matlab::io::ipc::proxy {

class RecordBatchFileReader : public libmexclass::proxy::Proxy {
public:
RecordBatchFileReader(std::shared_ptr<arrow::ipc::RecordBatchFileReader>);

static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments);

protected:

void getNumRecordBatches(libmexclass::proxy::method::Context& context);

std::shared_ptr<arrow::ipc::RecordBatchFileReader> reader;

};

} // namespace arrow::matlab::io::ipc::proxy
3 changes: 2 additions & 1 deletion matlab/src/cpp/arrow/matlab/proxy/factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include "arrow/matlab/error/error.h"
#include "arrow/matlab/io/csv/proxy/table_reader.h"
#include "arrow/matlab/io/csv/proxy/table_writer.h"
#include "arrow/matlab/io/ipc/proxy/record_batch_file_reader.h"
#include "arrow/matlab/io/ipc/proxy/record_batch_file_writer.h"
#include "arrow/matlab/io/feather/proxy/reader.h"
#include "arrow/matlab/io/feather/proxy/writer.h"
Expand Down Expand Up @@ -109,8 +110,8 @@ libmexclass::proxy::MakeResult Factory::make_proxy(
REGISTER_PROXY(arrow.c.proxy.Schema , arrow::matlab::c::proxy::Schema);
REGISTER_PROXY(arrow.c.proxy.RecordBatchImporter , arrow::matlab::c::proxy::RecordBatchImporter);
REGISTER_PROXY(arrow.io.ipc.proxy.RecordBatchFileWriter , arrow::matlab::io::ipc::proxy::RecordBatchFileWriter);
REGISTER_PROXY(arrow.io.ipc.proxy.RecordBatchFileReader , arrow::matlab::io::ipc::proxy::RecordBatchFileReader);
// clang-format on

return libmexclass::error::Error{error::UNKNOWN_PROXY_ERROR_ID,
"Did not find matching C++ proxy for " + class_name};
};
Expand Down
3 changes: 2 additions & 1 deletion matlab/tools/cmake/BuildMatlabArrowInterface.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/a
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/array_importer.cc"
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/schema.cc"
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/record_batch_importer.cc"
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_file_writer.cc")
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_file_writer.cc"
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_file_reader.cc")


set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_FACTORY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/proxy")
Expand Down

0 comments on commit e6a0ea1

Please sign in to comment.