From 00608a092b814c72abe760de58e4c4a563bff7b2 Mon Sep 17 00:00:00 2001 From: Tom Aldcroft Date: Fri, 6 Nov 2020 07:37:58 -0500 Subject: [PATCH] Merge pull request #10995 from taldcroft/ascii-bool-masked Support masked bool data in ASCII files with blank string --- CHANGES.rst | 7 +++++++ astropy/io/ascii/core.py | 15 ++++++++++----- astropy/io/ascii/tests/test_ecsv.py | 23 +++++++++++++++++++++++ astropy/io/ascii/tests/test_read.py | 29 ++++++++++++++++++++++++++++- 4 files changed, 68 insertions(+), 6 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index a59a2b17c0e..576bcc46b3c 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -956,6 +956,13 @@ astropy.io.ascii reading, so we now ignore the parallel option and fall back to serial reading. [#10880] +- Fixed a bug where "" (blank string) as input data for a boolean type column + was causing an exception instead of indicating a masked value. As a + consequence of the fix, the values "0" and "1" are now also allowed as valid + inputs for boolean type columns. These new allowed values apply for both ECSV + and for basic character-delimited data files ('basic' format with appropriate + ``converters`` specified). [#10995] + astropy.io.fits ^^^^^^^^^^^^^^^ diff --git a/astropy/io/ascii/core.py b/astropy/io/ascii/core.py index d60b260f95a..9f9f520144a 100644 --- a/astropy/io/ascii/core.py +++ b/astropy/io/ascii/core.py @@ -945,13 +945,18 @@ def bool_converter(vals): # Try a smaller subset first for a long array if len(vals) > 10000: svals = numpy.asarray(vals[:1000]) - if not numpy.all((svals == 'False') | (svals == 'True')): - raise ValueError('bool input strings must be only False or True') + if not numpy.all((svals == 'False') + | (svals == 'True') + | (svals == '0') + | (svals == '1')): + raise ValueError('bool input strings must be False, True, 0, 1, or ""') vals = numpy.asarray(vals) - trues = vals == 'True' - falses = vals == 'False' + + trues = (vals == 'True') | (vals == '1') + falses = (vals == 'False') | (vals == '0') if not numpy.all(trues | falses): - raise ValueError('bool input strings must be only False or True') + raise ValueError('bool input strings must be only False, True, 0, 1, or ""') + return trues def generic_converter(vals): diff --git a/astropy/io/ascii/tests/test_ecsv.py b/astropy/io/ascii/tests/test_ecsv.py index 8769c0f7859..e5a8a9c9b6b 100644 --- a/astropy/io/ascii/tests/test_ecsv.py +++ b/astropy/io/ascii/tests/test_ecsv.py @@ -6,6 +6,7 @@ Requires `pyyaml `_ to be installed. """ +from astropy.table.column import MaskedColumn import os import copy import sys @@ -581,3 +582,25 @@ def test_round_trip_user_defined_unit(table_cls, tmpdir): t4 = table_cls.read(filename) assert t4['l'].unit is unit assert np.all(t4['l'] == t['l']) + + +@pytest.mark.skipif('not HAS_YAML') +def test_read_masked_bool(): + txt = """\ +# %ECSV 0.9 +# --- +# datatype: +# - {name: col0, datatype: bool} +# schema: astropy-2.0 +col0 +1 +0 +True +"" +False +""" + dat = ascii.read(txt, format='ecsv') + col = dat['col0'] + assert isinstance(col, MaskedColumn) + assert np.all(col.mask == [False, False, False, True, False]) + assert np.all(col == [True, False, True, False, False]) diff --git a/astropy/io/ascii/tests/test_read.py b/astropy/io/ascii/tests/test_read.py index 2903f937b77..4e3d2ffc3a7 100644 --- a/astropy/io/ascii/tests/test_read.py +++ b/astropy/io/ascii/tests/test_read.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Licensed under a 3-clause BSD style license - see LICENSE.rst +from astropy.io.ascii.core import convert_numpy import re from io import BytesIO, open from collections import OrderedDict @@ -13,7 +14,7 @@ import numpy as np from astropy.io import ascii -from astropy.table import Table +from astropy.table import Table, MaskedColumn from astropy import table from astropy.units import Unit from astropy.table.table_helpers import simple_table @@ -1615,3 +1616,29 @@ def test_set_invalid_names(rdb, fast_reader): ascii.read(lines, fast_reader=fast_reader, format=fmt, guess=rdb, names=['b1', 'b2', 'b1', None, None]) assert 'Cannot have None for column name' in str(err.value) + + +def test_read_masked_bool(): + txt = """\ +col0 col1 +1 1 +0 2 +True 3 +"" 4 +False 5 +""" + # Reading without converters returns col0 as a string + dat = ascii.read(txt, format='basic') + col = dat['col0'] + assert isinstance(col, MaskedColumn) + assert col.dtype.kind == 'U' + assert col[0] == "1" + + # Force col0 to be read as bool + converters = {'col0': [convert_numpy(np.bool)]} + dat = ascii.read(txt, format='basic', converters=converters) + col = dat['col0'] + assert isinstance(col, MaskedColumn) + assert col.dtype.kind == 'b' + assert np.all(col.mask == [False, False, False, True, False]) + assert np.all(col == [True, False, True, False, False])