Skip to content

Commit

Permalink
Merge pull request astropy#10995 from taldcroft/ascii-bool-masked
Browse files Browse the repository at this point in the history
Support masked bool data in ASCII files with blank string
  • Loading branch information
taldcroft authored and bsipocz committed Nov 6, 2020
1 parent 8fc5ce6 commit 00608a0
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 6 deletions.
7 changes: 7 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -956,6 +956,13 @@ astropy.io.ascii
reading, so we now ignore the parallel option and fall back to serial reading.
[#10880]

- Fixed a bug where "" (blank string) as input data for a boolean type column
was causing an exception instead of indicating a masked value. As a
consequence of the fix, the values "0" and "1" are now also allowed as valid
inputs for boolean type columns. These new allowed values apply for both ECSV
and for basic character-delimited data files ('basic' format with appropriate
``converters`` specified). [#10995]

astropy.io.fits
^^^^^^^^^^^^^^^

Expand Down
15 changes: 10 additions & 5 deletions astropy/io/ascii/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -945,13 +945,18 @@ def bool_converter(vals):
# Try a smaller subset first for a long array
if len(vals) > 10000:
svals = numpy.asarray(vals[:1000])
if not numpy.all((svals == 'False') | (svals == 'True')):
raise ValueError('bool input strings must be only False or True')
if not numpy.all((svals == 'False')
| (svals == 'True')
| (svals == '0')
| (svals == '1')):
raise ValueError('bool input strings must be False, True, 0, 1, or ""')
vals = numpy.asarray(vals)
trues = vals == 'True'
falses = vals == 'False'

trues = (vals == 'True') | (vals == '1')
falses = (vals == 'False') | (vals == '0')
if not numpy.all(trues | falses):
raise ValueError('bool input strings must be only False or True')
raise ValueError('bool input strings must be only False, True, 0, 1, or ""')

return trues

def generic_converter(vals):
Expand Down
23 changes: 23 additions & 0 deletions astropy/io/ascii/tests/test_ecsv.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
Requires `pyyaml <https://pyyaml.org/>`_ to be installed.
"""
from astropy.table.column import MaskedColumn
import os
import copy
import sys
Expand Down Expand Up @@ -581,3 +582,25 @@ def test_round_trip_user_defined_unit(table_cls, tmpdir):
t4 = table_cls.read(filename)
assert t4['l'].unit is unit
assert np.all(t4['l'] == t['l'])


@pytest.mark.skipif('not HAS_YAML')
def test_read_masked_bool():
txt = """\
# %ECSV 0.9
# ---
# datatype:
# - {name: col0, datatype: bool}
# schema: astropy-2.0
col0
1
0
True
""
False
"""
dat = ascii.read(txt, format='ecsv')
col = dat['col0']
assert isinstance(col, MaskedColumn)
assert np.all(col.mask == [False, False, False, True, False])
assert np.all(col == [True, False, True, False, False])
29 changes: 28 additions & 1 deletion astropy/io/ascii/tests/test_read.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
# Licensed under a 3-clause BSD style license - see LICENSE.rst

from astropy.io.ascii.core import convert_numpy
import re
from io import BytesIO, open
from collections import OrderedDict
Expand All @@ -13,7 +14,7 @@
import numpy as np

from astropy.io import ascii
from astropy.table import Table
from astropy.table import Table, MaskedColumn
from astropy import table
from astropy.units import Unit
from astropy.table.table_helpers import simple_table
Expand Down Expand Up @@ -1615,3 +1616,29 @@ def test_set_invalid_names(rdb, fast_reader):
ascii.read(lines, fast_reader=fast_reader, format=fmt, guess=rdb,
names=['b1', 'b2', 'b1', None, None])
assert 'Cannot have None for column name' in str(err.value)


def test_read_masked_bool():
txt = """\
col0 col1
1 1
0 2
True 3
"" 4
False 5
"""
# Reading without converters returns col0 as a string
dat = ascii.read(txt, format='basic')
col = dat['col0']
assert isinstance(col, MaskedColumn)
assert col.dtype.kind == 'U'
assert col[0] == "1"

# Force col0 to be read as bool
converters = {'col0': [convert_numpy(np.bool)]}
dat = ascii.read(txt, format='basic', converters=converters)
col = dat['col0']
assert isinstance(col, MaskedColumn)
assert col.dtype.kind == 'b'
assert np.all(col.mask == [False, False, False, True, False])
assert np.all(col == [True, False, True, False, False])

0 comments on commit 00608a0

Please sign in to comment.