diff --git a/chempy/util/parsing.py b/chempy/util/parsing.py index 332fb2e4..7c635165 100644 --- a/chempy/util/parsing.py +++ b/chempy/util/parsing.py @@ -52,17 +52,17 @@ def _get_formula_parser(): BNF for simple chemical formula (no nesting) - integer :: '0'..'9'+ + real :: '0'..'9'+ element :: 'A'..'Z' 'a'..'z'* - term :: element [integer] + term :: element [real] formula :: term+ BNF for nested chemical formula - integer :: '0'..'9'+ + real :: '0'..'9'+ element :: 'A'..'Z' 'a'..'z'* - term :: (element | '(' formula ')') [integer] + term :: (element | '(' formula ')') [real] formula :: term+ Notes @@ -80,14 +80,14 @@ def _get_formula_parser(): _p = __import__(parsing_library) Forward, Group, OneOrMore = _p.Forward, _p.Group, _p.OneOrMore Optional, ParseResults, Regex = _p.Optional, _p.ParseResults, _p.Regex - Suppress, Word, nums = _p.Suppress, _p.Word, _p.nums + Suppress, Word, nums, Combine = _p.Suppress, _p.Word, _p.nums, _p.Combine LPAR, RPAR = map(Suppress, "()") - integer = Word(nums) + real = Combine(Word(nums) + Optional('.' + Word(nums))) - # add parse action to convert integers to ints, to support doing addition + # add parse action to convert reals to floats, to support doing addition # and multiplication at parse time - integer.setParseAction(lambda t: int(t[0])) + real.setParseAction(lambda t: float(t[0])) # element = Word(alphas.upper(), alphas.lower()) # or if you want to be more specific, use this Regex @@ -101,7 +101,7 @@ def _get_formula_parser(): formula = Forward() term = Group((element | Group(LPAR + formula + RPAR)("subgroup")) + - Optional(integer, default=1)("mult")) + Optional(real, default=1)("mult")) # add parse actions for parse-time processing @@ -205,6 +205,7 @@ def _formula_to_parts(formula, prefixes, suffixes): def _parse_stoich(stoich): if stoich == 'e': # special case, the electron is not an element return {} + return {symbols.index(k)+1: n for k, n in _get_formula_parser().parseString(stoich)} @@ -218,19 +219,22 @@ def _parse_stoich(stoich): _latex_mapping = {k + '-': '\\' + k + '-' for k in _greek_letters} _latex_mapping['epsilon-'] = '\\varepsilon-' _latex_mapping['omicron-'] = 'o-' -_latex_mapping['.'] = '^\\bullet ' -_latex_infix_mapping = {'.': '\\cdot '} +_latex_mapping['.'] = '.' +_latex_mapping[':'] = '\\mathpunct{:} ' +_latex_infix_mapping = {':': '\\mathpunct{:} '} _unicode_mapping = {k + '-': v + '-' for k, v in zip(_greek_letters, _greek_u)} _unicode_mapping['.'] = u'⋅' -_unicode_infix_mapping = {'.': u'·'} +_unicode_mapping[':'] = u':' +_unicode_infix_mapping = {':': u':'} _html_mapping = {k + '-': '&' + k + ';-' for k in _greek_letters} _html_mapping['.'] = '⋅' +_html_mapping[':'] = ':' _html_infix_mapping = _html_mapping -def _get_leading_integer(s): +def _get_leading_coeff(s): m = re.findall(r'^\d+', s) if len(m) == 0: m = 1 @@ -254,7 +258,7 @@ def formula_to_composition(formula, prefixes=None, formula: str Chemical formula, e.g. 'H2O', 'Fe+3', 'Cl-' prefixes: iterable strings - Prefixes to ignore, e.g. ('.', 'alpha-') + Prefixes to ignore, e.g. ( 'alpha-') suffixes: tuple of strings Suffixes to ignore, e.g. ('(g)', '(s)') @@ -262,9 +266,9 @@ def formula_to_composition(formula, prefixes=None, -------- >>> formula_to_composition('NH4+') == {0: 1, 1: 4, 7: 1} True - >>> formula_to_composition('.NHO-(aq)') == {0: -1, 1: 1, 7: 1, 8: 1} + >>> formula_to_composition(':NHO-(aq)') == {0: -1, 1: 1, 7: 1, 8: 1} True - >>> formula_to_composition('Na2CO3.7H2O') == {11: 2, 6: 1, 8: 10, 1: 14} + >>> formula_to_composition('Na2CO3:7H2O') == {11: 2, 6: 1, 8: 10, 1: 14} True """ @@ -272,12 +276,12 @@ def formula_to_composition(formula, prefixes=None, prefixes = _latex_mapping.keys() stoich_tok, chg_tok = _formula_to_parts(formula, prefixes, suffixes)[:2] tot_comp = {} - parts = stoich_tok.split('.') + parts = stoich_tok.split(':') for idx, stoich in enumerate(parts): if idx == 0: m = 1 else: - m, stoich = _get_leading_integer(stoich) + m, stoich = _get_leading_coeff(stoich) comp = _parse_stoich(stoich) for k, v in comp.items(): if k not in tot_comp: @@ -321,7 +325,7 @@ def _parse_multiplicity(strings, substance_keys=None): elif len(items) == 2: if items[1] not in result: result[items[1]] = 0 - result[items[1]] += float(items[0]) if '.' in items[0] or 'e' in items[0] else int(items[0]) + result[items[1]] += float(items[0]) if ':' in items[0] or 'e' in items[0] else int(items[0]) else: raise ValueError("To many parts in substring") if substance_keys is not None: @@ -398,14 +402,14 @@ def to_reaction(line, substance_keys, token, Cls, globals_=None, **kwargs): def _formula_to_format(sub, sup, formula, prefixes=None, infixes=None, suffixes=('(s)', '(l)', '(g)', '(aq)')): parts = _formula_to_parts(formula, prefixes.keys(), suffixes) - stoichs = parts[0].split('.') + stoichs = parts[0].split(':') string = '' for idx, stoich in enumerate(stoichs): if idx == 0: m = 1 else: - m, stoich = _get_leading_integer(stoich) - string += _subs('.', infixes) + m, stoich = _get_leading_coeff(stoich) + string += _subs(':', infixes) if m != 1: string += str(m) string += re.sub(r'([0-9]+)', lambda m: sub(m.group(1)), stoich) @@ -445,8 +449,8 @@ def formula_to_latex(formula, prefixes=None, infixes=None, **kwargs): 'Fe(CN)_{6}^{2+}' >>> formula_to_latex('Fe(CN)6+2(aq)') 'Fe(CN)_{6}^{2+}(aq)' - >>> formula_to_latex('.NHO-(aq)') - '^\\bullet NHO^{-}(aq)' + >>> formula_to_latex(':NHO-(aq)') + '\\mathpunct{:} NHO^{-}(aq)' >>> formula_to_latex('alpha-FeOOH(s)') '\\alpha-FeOOH(s)' diff --git a/chempy/util/tests/test_parsing.py b/chempy/util/tests/test_parsing.py index 5b292488..70e592db 100644 --- a/chempy/util/tests/test_parsing.py +++ b/chempy/util/tests/test_parsing.py @@ -37,12 +37,12 @@ def test_formula_to_composition(): assert formula_to_composition('SO4-2(aq)') == {0: -2, 8: 4, 16: 1} # prefixes and suffixes - assert formula_to_composition('.NO2(g)') == {7: 1, 8: 2} - assert formula_to_composition('.NH2') == {1: 2, 7: 1} + assert formula_to_composition(':NO2(g)') == {7: 1, 8: 2} + assert formula_to_composition(':NH2') == {1: 2, 7: 1} assert formula_to_composition('ONOOH') == {1: 1, 7: 1, 8: 3} - assert formula_to_composition('.ONOO') == {7: 1, 8: 3} - assert formula_to_composition('.NO3/2-') == {0: -2, 7: 1, 8: 3} - assert formula_to_composition('.NO3-2') == {0: -2, 7: 1, 8: 3} + assert formula_to_composition(':ONOO') == {7: 1, 8: 3} + assert formula_to_composition(':NO3/2-') == {0: -2, 7: 1, 8: 3} + assert formula_to_composition(':NO3-2') == {0: -2, 7: 1, 8: 3} with pytest.raises(ValueError): formula_to_composition('F-F') @@ -51,7 +51,7 @@ def test_formula_to_composition(): assert formula_to_composition('epsilon-Zn(OH)2(s)') == {1: 2, 8: 2, 30: 1} # crystal water - assert formula_to_composition('Na2CO3.7H2O(s)') == {11: 2, 6: 1, 8: 10, 1: 14} + assert formula_to_composition('Na2CO3:7H2O(s)') == {11: 2, 6: 1, 8: 10, 1: 14} @requires(parsing_library) @@ -123,17 +123,17 @@ def test_formula_to_latex(): assert formula_to_latex('NaCl(s)') == 'NaCl(s)' assert formula_to_latex('e-(aq)') == 'e^{-}(aq)' assert formula_to_latex('Ca+2(aq)') == 'Ca^{2+}(aq)' - assert formula_to_latex('.NO2(g)') == r'^\bullet NO_{2}(g)' - assert formula_to_latex('.NH2') == r'^\bullet NH_{2}' + assert formula_to_latex(':NO2(g)') == r'\mathpunct{:} NO_{2}(g)' + assert formula_to_latex(':NH2') == r'\mathpunct{:} NH_{2}' assert formula_to_latex('ONOOH') == 'ONOOH' - assert formula_to_latex('.ONOO') == r'^\bullet ONOO' - assert formula_to_latex('.NO3/2-') == r'^\bullet NO_{3}^{2-}' - assert formula_to_latex('.NO3-2') == r'^\bullet NO_{3}^{2-}' + assert formula_to_latex(':ONOO') == r'\mathpunct{:} ONOO' + assert formula_to_latex(':NO3/2-') == r'\mathpunct{:} NO_{3}^{2-}' + assert formula_to_latex(':NO3-2') == r'\mathpunct{:} NO_{3}^{2-}' assert formula_to_latex('alpha-FeOOH(s)') == r'\alpha-FeOOH(s)' assert formula_to_latex('epsilon-Zn(OH)2(s)') == ( r'\varepsilon-Zn(OH)_{2}(s)') - assert formula_to_latex('Na2CO3.7H2O(s)') == r'Na_{2}CO_{3}\cdot 7H_{2}O(s)' - assert formula_to_latex('Na2CO3.1H2O(s)') == r'Na_{2}CO_{3}\cdot H_{2}O(s)' + assert formula_to_latex('Na2CO3:7H2O(s)') == r'Na_{2}CO_{3}\mathpunct{:} 7H_{2}O(s)' + assert formula_to_latex('Na2CO3:1H2O(s)') == r'Na_{2}CO_{3}\mathpunct{:} H_{2}O(s)' @requires(parsing_library) @@ -151,16 +151,16 @@ def test_formula_to_unicoce(): assert formula_to_unicode('NaCl(s)') == u'NaCl(s)' assert formula_to_unicode('e-(aq)') == u'e⁻(aq)' assert formula_to_unicode('Ca+2(aq)') == u'Ca²⁺(aq)' - assert formula_to_unicode('.NO2(g)') == u'⋅NO₂(g)' - assert formula_to_unicode('.NH2') == u'⋅NH₂' + assert formula_to_unicode(':NO2(g)') == u':NO₂(g)' + assert formula_to_unicode(':NH2') == u':NH₂' assert formula_to_unicode('ONOOH') == u'ONOOH' - assert formula_to_unicode('.ONOO') == u'⋅ONOO' - assert formula_to_unicode('.NO3/2-') == u'⋅NO₃²⁻' - assert formula_to_unicode('.NO3-2') == u'⋅NO₃²⁻' + assert formula_to_unicode(':ONOO') == u':ONOO' + assert formula_to_unicode(':NO3/2-') == u':NO₃²⁻' + assert formula_to_unicode(':NO3-2') == u':NO₃²⁻' assert formula_to_unicode('alpha-FeOOH(s)') == u'α-FeOOH(s)' assert formula_to_unicode('epsilon-Zn(OH)2(s)') == u'ε-Zn(OH)₂(s)' - assert formula_to_unicode('Na2CO3.7H2O(s)') == u'Na₂CO₃·7H₂O(s)' - assert formula_to_unicode('Na2CO3.1H2O(s)') == u'Na₂CO₃·H₂O(s)' + assert formula_to_unicode('Na2CO3:7H2O(s)') == u'Na₂CO₃:7H₂O(s)' + assert formula_to_unicode('Na2CO3:1H2O(s)') == u'Na₂CO₃:H₂O(s)' @requires(parsing_library) @@ -177,14 +177,14 @@ def test_formula_to_html(): assert formula_to_html('NaCl(s)') == 'NaCl(s)' assert formula_to_html('e-(aq)') == 'e-(aq)' assert formula_to_html('Ca+2(aq)') == 'Ca2+(aq)' - assert formula_to_html('.NO2(g)') == r'⋅NO2(g)' - assert formula_to_html('.NH2') == r'⋅NH2' + assert formula_to_html(':NO2(g)') == r':NO2(g)' + assert formula_to_html(':NH2') == r':NH2' assert formula_to_html('ONOOH') == 'ONOOH' - assert formula_to_html('.ONOO') == r'⋅ONOO' - assert formula_to_html('.NO3/2-') == r'⋅NO32-' - assert formula_to_html('.NO3-2') == r'⋅NO32-' + assert formula_to_html(':ONOO') == r':ONOO' + assert formula_to_html(':NO3/2-') == r':NO32-' + assert formula_to_html(':NO3-2') == r':NO32-' assert formula_to_html('alpha-FeOOH(s)') == r'α-FeOOH(s)' assert formula_to_html('epsilon-Zn(OH)2(s)') == ( r'ε-Zn(OH)2(s)') - assert formula_to_html('Na2CO3.7H2O(s)') == 'Na2CO3⋅7H2O(s)' - assert formula_to_html('Na2CO3.1H2O(s)') == 'Na2CO3⋅H2O(s)' + assert formula_to_html('Na2CO3:7H2O(s)') == 'Na2CO3:7H2O(s)' + assert formula_to_html('Na2CO3:1H2O(s)') == 'Na2CO3:H2O(s)'