From 6acd2f8e7eadd15bacf64207ceedbbb481ffc7d7 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Tue, 26 Dec 2023 17:06:01 -0800 Subject: [PATCH] feat: format exceptions in RBR export --- python/pyarrow/src/arrow/python/common.cc | 34 ++++++++++++++++++++--- python/pyarrow/tests/test_cffi.py | 31 +++++++++++++++++++++ 2 files changed, 61 insertions(+), 4 deletions(-) diff --git a/python/pyarrow/src/arrow/python/common.cc b/python/pyarrow/src/arrow/python/common.cc index 6fe2ed4dae321..5ebd2086292f9 100644 --- a/python/pyarrow/src/arrow/python/common.cc +++ b/python/pyarrow/src/arrow/python/common.cc @@ -89,10 +89,36 @@ class PythonErrorDetail : public StatusDetail { const char* type_id() const override { return kErrorDetailTypeId; } std::string ToString() const override { - // This is simple enough not to need the GIL - const auto ty = reinterpret_cast(exc_type_.obj()); - // XXX Should we also print traceback? - return std::string("Python exception: ") + ty->tp_name; + PyAcquireGIL lock; + + // Use traceback.format_exception() + OwnedRef traceback_module; + // TODO: what to do with Status? + internal::ImportModule("traceback", &traceback_module); + + OwnedRef fmt_exception; + internal::ImportFromModule(traceback_module.obj(), "format_exception", + &fmt_exception); + + OwnedRef formatted; + formatted.reset(PyObject_CallFunctionObjArgs(fmt_exception.obj(), exc_type_.obj(), + exc_value_.obj(), exc_traceback_.obj(), + NULL)); + + if (formatted) { + std::string result = "Python exception: "; + Py_ssize_t num_lines = PyList_GET_SIZE(formatted.obj()); + for (Py_ssize_t i = 0; i < num_lines; ++i) { + std::string line_str; + internal::PyObject_StdStringStr(PyList_GET_ITEM(formatted.obj(), i), &line_str); + result += line_str; + } + return result; + } else { + // Fallback to just the exception type + const auto ty = reinterpret_cast(exc_type_.obj()); + return std::string("Python exception: ") + ty->tp_name; + } } void RestorePyError() const { diff --git a/python/pyarrow/tests/test_cffi.py b/python/pyarrow/tests/test_cffi.py index a9c17cc100cb4..e20d7be7eacfb 100644 --- a/python/pyarrow/tests/test_cffi.py +++ b/python/pyarrow/tests/test_cffi.py @@ -414,6 +414,37 @@ def test_export_import_batch_reader(reader_factory): pa.RecordBatchReader._import_from_c(ptr_stream) +@needs_cffi +def test_export_import_exception_reader(): + c_stream = ffi.new("struct ArrowArrayStream*") + ptr_stream = int(ffi.cast("uintptr_t", c_stream)) + + gc.collect() # Make sure no Arrow data dangles in a ref cycle + old_allocated = pa.total_allocated_bytes() + + def gen(): + if True: + try: + raise ValueError('foo') + except ValueError as e: + raise NotImplementedError('bar') from e + else: + yield from make_batches() + + original = pa.RecordBatchReader.from_batches(make_schema(), gen()) + original._export_to_c(ptr_stream) + + reader = pa.RecordBatchReader._import_from_c(ptr_stream) + with pytest.raises(OSError) as exc_info: + reader.read_next_batch() + + # inner *and* outer exception should be present + assert 'ValueError: foo' in str(exc_info.value) + assert 'NotImplementedError: bar' in str(exc_info.value) + + assert pa.total_allocated_bytes() == old_allocated + + @needs_cffi def test_imported_batch_reader_error(): c_stream = ffi.new("struct ArrowArrayStream*")