From 2582bdfd8f780e27b1a262c8a4d51f4e36fedf0f Mon Sep 17 00:00:00 2001 From: Ko van der Sloot Date: Thu, 5 Dec 2024 08:33:45 +0100 Subject: [PATCH] Squashed commit of the following: in C++ all enum types are 'enum class' now. added AbstractWord as common parent of Word and Hiddenword --- foliatools/foliaspec.py | 131 ++++++++++++++++++++++++++-------------- 1 file changed, 87 insertions(+), 44 deletions(-) diff --git a/foliatools/foliaspec.py b/foliatools/foliaspec.py index 6c90b06..7a8db85 100644 --- a/foliatools/foliaspec.py +++ b/foliatools/foliaspec.py @@ -144,7 +144,7 @@ def outputvar(var, value, target, declare = False): if value is None: if declare: raise NotImplementedError("Declare not supported for None values") if varname in ('REQUIRED_ATTRIBS','OPTIONAL_ATTRIBS'): - return var + ' = NO_ATT;' + return var + ' = Attrib::NO_ATT;' elif varname == 'ANNOTATIONTYPE': return var + ' = AnnotationType::NO_ANN;' elif varname in ('XMLTAG','TEXTDELIMITER'): @@ -176,10 +176,10 @@ def outputvar(var, value, target, declare = False): else: typedeclaration = '' operator = '+=' - value = [ x + '_t' for x in value ] + value = [ 'ElementType::' + x + '_t' for x in value ] return typedeclaration + var + ' ' + operator + ' {' + ', '.join(value) + '};' elif all([ x in spec['attributes'] for x in value ]): - return var + ' = ' + '|'.join(value) + ';' + return var + ' = Attrib::' + '|Attrib::'.join(value) + ';' else: return typedeclaration + var + ' = { ' + ', '.join([ '"' + x + '"' for x in value if x]) + ', };' else: @@ -306,6 +306,12 @@ def setelementproperties_cpp(element,indent, defer,done): # element. # for C++ we split them s += indent + to_upper_props(cls) + ' = ABSTRACT_HIGHER_ORDER_ANNOTATION_PROPERTIES;\n' + if cls == 'AbstractWord': + # this is from a pseudo AbstractWord element created special for C++ + # the specification has Word both as an abstract base AND as an + # element. + # for C++ we split them + s += indent + to_upper_props(cls) + ' = ABSTRACT_STRUCTURE_ELEMENT_PROPERTIES;\n' if cls in parents: for parent in parents[cls]: if parent not in done: @@ -313,14 +319,20 @@ def setelementproperties_cpp(element,indent, defer,done): return None else: if parent == 'Feature' or cls == 'Feature': - # another trci needed to split Feature + # another trick needed to split Feature parent = 'AbstractFeature' + if cls == 'Word': + # another trick needed to split Word + parent = 'AbstractWord' + if cls == 'Hiddenword': + # another trick needed to split Word + parent = 'AbstractWord' if 'Abstract' not in cls: if 'Abstract' not in parent: s += indent + cls + '::PROPS = ' + parent + '::PROPS;\n' else: s += indent + cls + '::PROPS = ' + to_upper_props(parent) + ';\n' - s += indent + 'abstract_parents[' + cls + '_t] = ' + parent + '_t;\n' + s += indent + 'abstract_parents[ElementType::' + cls + '_t] = ElementType::' + parent + '_t;\n' else: if 'Abstract' in parent: @@ -328,9 +340,9 @@ def setelementproperties_cpp(element,indent, defer,done): break if 'Abstract' not in cls: - s += indent + cls + '::PROPS.ELEMENT_ID = ' + cls + '_t;\n' + s += indent + cls + '::PROPS.ELEMENT_ID = ElementType::' + cls + '_t;\n' else: - s += indent + to_upper_props(cls) + '.ELEMENT_ID = ' + cls + '_t;\n' + s += indent + to_upper_props(cls) + '.ELEMENT_ID = ElementType::' + cls + '_t;\n' if 'properties' in element: for prop, value in sorted(element['properties'].items()): if target not in skip_properties or prop not in skip_properties[target]: @@ -343,22 +355,33 @@ def setelementproperties_cpp(element,indent, defer,done): value = 'feat' prop = 'subset' elif prop == 'accepted_data': - value = tuple(sorted(addfromparents(element['class'],'accepted_data'))) - if 'Feature' in value: + values = addfromparents(element['class'],'accepted_data') + if 'Feature' in values: # if Feature is acceptable, ANY feature will be - value += ('AbstractFeature',) + values.add('AbstractFeature') + values.remove('Feature') + if 'WordReference' in values: + for e in sorted(flattenclasses(spec['wrefables'])): + values.add(e) + if 'Hiddenword' in values: + # if Hiddenword is acceptable, ANY AbstractWord will be + values.add('AbstractWord') + values.remove('Hiddenword') + if 'Word' in values: + # if Word is acceptable, ANY AbstractWord will be + values.add('AbstractWord') + values.remove('Word') + value = tuple(sorted(values)) if ('textcontainer' in element['properties'] and element['properties']['textcontainer']) or ('phoncontainer' in element['properties'] and element['properties']['phoncontainer']): value += ('XmlText',) - if 'WordReference' in value: - value += tuple( e for e in sorted(flattenclasses(spec['wrefables'])) ) if 'Abstract' not in cls: s += indent + outputvar(cls + '::PROPS.' + prop.upper(), value, target) + '\n' else: s += indent + to_upper_props(cls) + "." + outputvar(prop.upper(), value,target) + '\n' if 'Abstract' not in cls: - s += indent + 'element_props[' + cls + '_t] = &' + cls + '::PROPS;\n' + s += indent + 'element_props[ElementType::' + cls + '_t] = &' + cls + '::PROPS;\n' else: - s += indent + 'element_props[' + cls + '_t] = &' + to_upper_props(cls) + ';\n' + s += indent + 'element_props[ElementType::' + cls + '_t] = &' + to_upper_props(cls) + ';\n' done[cls] = True return s @@ -449,7 +472,7 @@ def outputblock(block, target, varname, args, indent = ""): s += indent + "class Attrib:\n" s += indent + " " + ", ".join(spec['attributes']) + " = range(" + str(len(spec['attributes'])) + ")" elif target == 'c++': - s += indent + "enum Attrib : int { NO_ATT=0, /// et_s_map = {\n" # first some types local to C++ - s += indent + " { BASE, \"FoLiA\" },\n" - s += indent + " { AbstractFeature_t, \"_AbstractFeature\" },\n" + s += indent + " { ElementType::BASE, \"FoLiA\" },\n" + s += indent + " { ElementType::AbstractFeature_t, \"_AbstractFeature\" },\n" + s += indent + " { ElementType::AbstractWord_t, \"_AbstractWord\" },\n" # and then the rest for element in elements: if 'properties' in element and 'xmltag' in element['properties'] and element['properties']['xmltag']: - s += indent + " { " + element['class'] + '_t, "' + element['properties']['xmltag'] + '" },\n' + s += indent + " { ElementType::" + element['class'] + '_t, "' + element['properties']['xmltag'] + '" },\n' elif 'properties' in element and 'subset' in element['properties'] and element['properties']['subset']: if element['class'] == 'HeadFeature': - s += indent + " { HeadFeature_t, \"headfeature\" },\n" + s += indent + " { ElementType::HeadFeature_t, \"headfeature\" },\n" else: - s += indent + " { " + element['class'] + '_t, "' + element['properties']['subset'] + '" },\n' + s += indent + " { ElementType::" + element['class'] + '_t, "' + element['properties']['subset'] + '" },\n' else: - s += indent + " { " + element['class'] + '_t, "_' + element['class'] + '" },\n' - s += indent + ' { XmlComment_t, "_XmlComment" },\n' - s += indent + ' { ProcessingInstruction_t, "PI" },\n' - s += indent + ' { XmlText_t, "_XmlText" }\n' + s += indent + " { ElementType::" + element['class'] + '_t, "_' + element['class'] + '" },\n' + s += indent + ' { ElementType::XmlComment_t, "_XmlComment" },\n' + s += indent + ' { ElementType::ProcessingInstruction_t, "PI" },\n' + s += indent + ' { ElementType::XmlText_t, "_XmlText" }\n' s += indent + "};\n" elif target == 'rust': s += indent + "match " + args[0] + " {\n" @@ -695,22 +730,23 @@ def outputblock(block, target, varname, args, indent = ""): if target == 'c++': s += indent + "const map s_et_map = {\n" # first some types local to C++ - s += indent + " { \"FoLiA\", BASE },\n" - s += indent + " { \"_AbstractFeature\", AbstractFeature_t },\n" + s += indent + " { \"FoLiA\", ElementType::BASE },\n" + s += indent + " { \"_AbstractFeature\", ElementType::AbstractFeature_t },\n" + s += indent + " { \"_AbstractWord\", ElementType::AbstractWord_t },\n" # and then the rest for element in elements: if 'properties' in element and 'xmltag' in element['properties'] and element['properties']['xmltag']: - s += indent + ' { "' + element['properties']['xmltag'] + '", ' + element['class'] + '_t },\n' + s += indent + ' { "' + element['properties']['xmltag'] + '", ElementType::' + element['class'] + '_t },\n' elif 'properties' in element and 'subset' in element['properties'] and element['properties']['subset']: if element['class'] == 'HeadFeature': - s += indent + " { \"headfeature\", HeadFeature_t },\n" + s += indent + " { \"headfeature\", ElementType::HeadFeature_t },\n" else: - s += indent + ' { "' + element['properties']['subset'] + '", ' + element['class'] + '_t },\n' + s += indent + ' { "' + element['properties']['subset'] + '", ElementType::' + element['class'] + '_t },\n' else: - s += indent + ' { "_' + element['class'] + '", ' + element['class'] + '_t },\n' - s += indent + ' { "_XmlComment", XmlComment_t },\n' - s += indent + ' { "PI", ProcessingInstruction_t },\n' - s += indent + ' { "_XmlText", XmlText_t }\n' + s += indent + ' { "_' + element['class'] + '", ElementType::' + element['class'] + '_t },\n' + s += indent + ' { "_XmlComment", ElementType::XmlComment_t },\n' + s += indent + ' { "PI", ElementType::ProcessingInstruction_t },\n' + s += indent + ' { "_XmlText", ElementType::XmlText_t }\n' s += indent + "};\n" elif target == 'rust': s += indent + "match " + args[0] + " {\n" @@ -772,28 +808,28 @@ def outputblock(block, target, varname, args, indent = ""): raise NotImplementedError("Block " + block + " not implemented for " + target) elif block == 'wrefables': if target == 'c++': - s += indent + "const set wrefables = { " + ", ".join([ e + '_t' for e in sorted(flattenclasses(spec['wrefables'])) ]) + " };\n" + s += indent + "const set wrefables = { " + ", ".join([ 'ElementType::' + e + '_t' for e in sorted(flattenclasses(spec['wrefables'])) ]) + " };\n" elif target == 'python': s += indent + "wrefables = ( " + ", ".join(spec['wrefables']) + ",)\n" else: raise NotImplementedError("Block " + block + " not implemented for " + target) elif block == 'default_ignore': if target == 'c++': - s += indent + "const set default_ignore = { " + ", ".join([ e + '_t' for e in sorted(flattenclasses(spec['default_ignore'])) ]) + " };\n" + s += indent + "const set default_ignore = { " + ", ".join([ 'ElementType::'+ e + '_t' for e in sorted(flattenclasses(spec['default_ignore'])) ]) + " };\n" elif target == 'python': s += indent + "default_ignore = ( " + ", ".join(spec['default_ignore']) + ",)\n" else: raise NotImplementedError("Block " + block + " not implemented for " + target) elif block == 'default_ignore_annotations': if target == 'c++': - s += indent + "const set default_ignore_annotations = { " + ", ".join([ e + '_t' for e in sorted(flattenclasses(spec['default_ignore_annotations'])) ]) + " };\n" + s += indent + "const set default_ignore_annotations = { " + ", ".join([ 'ElementType::'+ e + '_t' for e in sorted(flattenclasses(spec['default_ignore_annotations'])) ]) + " };\n" elif target == 'python': s += indent + "default_ignore_annotations = ( " + ", ".join(spec['default_ignore_annotations']) + ",)\n" else: raise NotImplementedError("Block " + block + " not implemented for " + target) elif block == 'default_ignore_structure': if target == 'c++': - s += indent + "const set default_ignore_structure = { " + ", ".join([ e + '_t' for e in sorted(flattenclasses(spec['default_ignore_structure'])) ]) + " };\n" + s += indent + "const set default_ignore_structure = { " + ", ".join([ 'ElementType::' + e + '_t' for e in sorted(flattenclasses(spec['default_ignore_structure'])) ]) + " };\n" elif target == 'python': s += indent + "default_ignore_structure = ( " + ", ".join(spec['default_ignore_structure']) + ",)\n" else: @@ -809,7 +845,14 @@ def outputblock(block, target, varname, args, indent = ""): if "Feature" in parentset: parentset.remove("Feature") parentset.append("AbstractFeature") - s += indent + " { " + child + '_t' + ", { " + ",".join([p + '_t' for p in parentset ]) + " } },\n" + # Word is special too, as we are splitting this in Word + # and AbstractWord, which is the parent of every *Word + if child == "Word" or child == "Hiddenword": + parentset.append("AbstractWord") + if "Word" in parentset: + parentset.remove("Word") + parentset.append("AbstractWord") + s += indent + " { " + 'ElementType::' + child + '_t' + ", { " + ",".join(['ElementType::' + p + '_t' for p in parentset ]) + " } },\n" s += indent + "};\n"; else: raise NotImplementedError("Block " + block + " not implemented for " + target)