Skip to content

Commit

Permalink
Assorted cleanups
Browse files Browse the repository at this point in the history
  • Loading branch information
WillAyd committed Jan 4, 2025
1 parent 25087f7 commit b4252c3
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 27 deletions.
26 changes: 6 additions & 20 deletions pandas/core/arrays/list_.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class ListDtype(ArrowDtype):
An ExtensionDtype suitable for storing homogeneous lists of data.
"""

_is_immutable = True # TODO(wayd): should we allow mutability?
_is_immutable = True

def __init__(self, value_dtype: pa.DataType) -> None:
super().__init__(pa.large_list(value_dtype))
Expand All @@ -100,10 +100,7 @@ def name(self) -> str: # type: ignore[override]
"""
A string identifying the data type.
"""
# TODO: reshaping tests require the name list to match the large_list
# implementation; assumedly there are some astype(str(dtype)) casts
# going on. Should fix so this can just be "list[...]" for end user
return f"large_list[{self.pyarrow_dtype.value_type!s}]"
return f"list[{self.pyarrow_dtype.value_type!s}]"

@property
def kind(self) -> str:
Expand All @@ -124,7 +121,6 @@ def construct_array_type(cls) -> type_t[ListArray]:
return ListArray

def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
# TODO(wayd): should we implemented value type support?
for dtype in dtypes:
if (
isinstance(dtype, ListDtype)
Expand Down Expand Up @@ -153,8 +149,7 @@ def __init__(
if isinstance(values, (pa.Array, pa.ChunkedArray)):
parent_type = values.type
if not isinstance(parent_type, (pa.ListType, pa.LargeListType)):
# Ideally could cast here, but I don't think pyarrow implements
# many list casts
# TODO: maybe implement native casts in pyarrow
new_values = [
[x.as_py()] if x.is_valid else None for x in values
]
Expand All @@ -164,12 +159,10 @@ def __init__(
else:
value_type = pa.array(values).type.value_type

# Internally always use large_string instead of string
if value_type == pa.string():
value_type = pa.large_string()

if not isinstance(values, pa.ChunkedArray):
# To support NA, we need to create an Array first :-(
arr = pa.array(values, type=pa.large_list(value_type), from_pandas=True)
self._pa_array = pa.chunked_array(arr, type=pa.large_list(value_type))
else:
Expand Down Expand Up @@ -200,8 +193,6 @@ def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False):
values = pa.array(scalars, from_pandas=True)

if values.type == "null" and dtype is not None:
# TODO: the sequencing here seems wrong; just making the tests pass for now
# but this needs a comprehensive review
pa_type = string_to_pyarrow_type(str(dtype))
values = pa.array(values, type=pa_type)

Expand Down Expand Up @@ -232,8 +223,6 @@ def _box_pa(
return cls._box_pa_array(value, pa_type)

def __getitem__(self, item):
# PyArrow does not support NumPy's selection with an equal length
# mask, so let's convert those to integral positions if needed
if isinstance(item, (np.ndarray, ExtensionArray)):
if is_bool_dtype(item.dtype):
mask_len = len(item)
Expand Down Expand Up @@ -305,9 +294,6 @@ def _empty(cls, shape: Shape, dtype: ExtensionDtype):
ExtensionDtype.empty
ExtensionDtype.empty is the 'official' public version of this API.
"""
# Implementer note: while ExtensionDtype.empty is the public way to
# call this method, it is still required to implement this `_empty`
# method as well (it is called internally in pandas)
if isinstance(shape, tuple):
if len(shape) > 1:
raise ValueError("ListArray may only be 1-D")
Expand All @@ -334,9 +320,9 @@ def __eq__(self, other):
elif isinstance(other, (pa.ListScalar, pa.LargeListScalar)):
from pandas.arrays import BooleanArray

# TODO: pyarrow.compute does not implement broadcasting equality
# for an array of lists to a listscalar
# TODO: pyarrow doesn't compare missing values as missing???
# TODO: pyarrow.compute does not implement equal for lists
# https://github.com/apache/arrow/issues/45167
# TODO: pyarrow doesn't compare missing values in Python as missing???
# arr = pa.array([1, 2, None])
# pc.equal(arr, arr[2]) returns all nulls but
# arr[2] == arr[2] returns True
Expand Down
3 changes: 0 additions & 3 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

import numpy as np
from numpy import ma
import pyarrow as pa

from pandas._config import using_string_dtype

Expand Down Expand Up @@ -462,8 +461,6 @@ def treat_as_nested(data, dtype) -> bool:
len(data) > 0
and is_list_like(data[0])
and getattr(data[0], "ndim", 1) == 1
# TODO(wayd): hack so pyarrow list elements don't expand
and not isinstance(data[0], pa.ListScalar)
and not isinstance(dtype, ListDtype)
and not (isinstance(data, ExtensionArray) and data.ndim == 2)
)
Expand Down
5 changes: 1 addition & 4 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1976,10 +1976,7 @@ def from_blocks(

@classmethod
def from_array(
cls,
array: ArrayLike,
index: Index,
refs: BlockValuesRefs | None = None,
cls, array: ArrayLike, index: Index, refs: BlockValuesRefs | None = None
) -> SingleBlockManager:
"""
Constructor for if we have an array that is not yet a Block.
Expand Down

0 comments on commit b4252c3

Please sign in to comment.