diff --git a/python/ray/data/datasource/file_based_datasource.py b/python/ray/data/datasource/file_based_datasource.py index 3cef3374d1749..8a3472e419f09 100644 --- a/python/ray/data/datasource/file_based_datasource.py +++ b/python/ray/data/datasource/file_based_datasource.py @@ -89,12 +89,13 @@ class FileShuffleConfig: >>> current_dir = Path(os.getcwd()) >>> # Create temporary Parquet files for testing in the current directory >>> paths = [current_dir / f"test_file_{i}.parquet" for i in range(5)] - >>> for i, path in enumerate(paths): - >>> # Write dummy Parquet files - >>> data = {'col1': range(10 * i, 10 * (i + 1)), - >>> 'col2': ['foo', 'bar'] * 5} - >>> table = pa.Table.from_pydict(data) - >>> pq.write_table(table, path) + >>> #Write dummy Parquet files + >>> table1 = pa.Table.from_pydict({'col1': range(10)}) + >>> table2 = pa.Table.from_pydict({'col1': range(10,20)}) + >>> table3 = pa.Table.from_pydict({'col1': range(20,30)}) + >>> pq.write_table(table1, paths[0]) + >>> pq.write_table(table2, paths[1]) + >>> pq.write_table(table3, paths[2]) >>> # Convert paths to strings for read_parquet >>> string_paths = [str(path) for path in paths] >>> # Read with deterministic shuffling