From 1a9734f4f78d80080a9af31ad1b131a9aa0fdd0f Mon Sep 17 00:00:00 2001
From: Vik Paruchuri <vik.paruchuri@gmail.com>
Date: Wed, 29 Jan 2025 09:55:55 -0500
Subject: [PATCH] Add test for partial row splitting

---
 marker/processors/table.py               |  3 ++-
 tests/processors/test_table_processor.py | 14 ++++++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/marker/processors/table.py b/marker/processors/table.py
index 1f455848..783a73ce 100644
--- a/marker/processors/table.py
+++ b/marker/processors/table.py
@@ -226,7 +226,8 @@ def split_combined_rows(self, tables: List[TableResult]):
                             new_cell_count += 1
 
                     # For each new row we add, shift up subsequent rows
-                    shift_up += line_lens[0] - 1
+                    # The max is to account for partial rows
+                    shift_up += max(line_lens) - 1
                 else:
                     for cell in row_cells:
                         cell.row_id += shift_up
diff --git a/tests/processors/test_table_processor.py b/tests/processors/test_table_processor.py
index 79224a58..72e2a04b 100644
--- a/tests/processors/test_table_processor.py
+++ b/tests/processors/test_table_processor.py
@@ -1,3 +1,5 @@
+from typing import List
+
 import pytest
 from marker.renderers.json import JSONRenderer
 
@@ -63,3 +65,15 @@ def test_ocr_table(pdf_document, detection_model, recognition_model, table_rec_m
     table_output = renderer(pdf_document)
     assert "1.2E-38" in table_output.markdown
 
+
+@pytest.mark.config({"page_range": [11]})
+def test_split_rows(pdf_document, detection_model, recognition_model, table_rec_model):
+    processor = TableProcessor(detection_model, recognition_model, table_rec_model)
+    processor(pdf_document)
+
+    table = pdf_document.contained_blocks((BlockTypes.Table,))[-1]
+    cells: List[TableCell] = table.contained_blocks(pdf_document, (BlockTypes.TableCell,))
+    unique_rows = len(set([cell.row_id for cell in cells]))
+    assert unique_rows == 6
+
+