Squashed commit of the following:

in C++ all enum types are 'enum class' now. added AbstractWord as common parent of Word and Hiddenword
proycon · Dec 5, 2024 · 2582bdf · 2582bdf
1 parent 2c210c6
commit 2582bdf
Showing 1 changed file with 87 additions and 44 deletions.
diff --git a/foliatools/foliaspec.py b/foliatools/foliaspec.py
@@ -144,7 +144,7 @@ def outputvar(var, value, target, declare = False):
         if value is None:
             if declare: raise NotImplementedError("Declare not supported for None values")
             if varname in ('REQUIRED_ATTRIBS','OPTIONAL_ATTRIBS'):
-                return var + ' = NO_ATT;'
+                return var + ' = Attrib::NO_ATT;'
             elif varname == 'ANNOTATIONTYPE':
                 return var + ' = AnnotationType::NO_ANN;'
             elif varname in ('XMLTAG','TEXTDELIMITER'):
@@ -176,10 +176,10 @@ def outputvar(var, value, target, declare = False):
                 else:
                     typedeclaration = ''
                     operator = '+='
-                value = [ x + '_t' for x in value ]
+                value = [ 'ElementType::' + x + '_t' for x in value ]
                 return typedeclaration + var + ' ' + operator + ' {' + ', '.join(value) + '};'
             elif all([ x in spec['attributes'] for x in value ]):
-                return var + ' = ' + '|'.join(value) + ';'
+                return var + ' = Attrib::' + '|Attrib::'.join(value) + ';'
             else:
                 return typedeclaration + var + ' = { ' + ', '.join([ '"' + x + '"' for x in value if x]) + ', };'
         else:
@@ -306,31 +306,43 @@ def setelementproperties_cpp(element,indent, defer,done):
         # element.
         # for C++ we split them
         s += indent + to_upper_props(cls) + ' = ABSTRACT_HIGHER_ORDER_ANNOTATION_PROPERTIES;\n'
+    if cls == 'AbstractWord':
+        # this is from a pseudo AbstractWord element created special for C++
+        # the specification has Word both as an abstract base AND as an
+        # element.
+        # for C++ we split them
+        s += indent + to_upper_props(cls) + ' = ABSTRACT_STRUCTURE_ELEMENT_PROPERTIES;\n'
     if cls in parents:
         for parent in parents[cls]:
             if parent not in done:
                 defer[parent].append(element)
                 return None
             else:
                 if parent == 'Feature' or cls == 'Feature':
-                    # another trci needed to split Feature
+                    # another trick needed to split Feature
                    parent = 'AbstractFeature'
+                if cls == 'Word':
+                    # another trick needed to split Word
+                   parent = 'AbstractWord'
+                if cls == 'Hiddenword':
+                    # another trick needed to split Word
+                   parent = 'AbstractWord'
                 if 'Abstract' not in cls:
                     if 'Abstract' not in parent:
                         s += indent + cls + '::PROPS = ' + parent + '::PROPS;\n'
                     else:
                         s += indent + cls + '::PROPS = ' + to_upper_props(parent) + ';\n'
-                        s += indent + 'abstract_parents[' + cls + '_t] = ' + parent + '_t;\n'
+                        s += indent + 'abstract_parents[ElementType::' + cls + '_t] = ElementType::' + parent + '_t;\n'
 
                 else:
                     if 'Abstract' in parent:
                         s += indent + to_upper_props(cls) + ' = ' + to_upper_props( parent ) + ';\n'
 
             break
     if 'Abstract' not in cls:
-        s += indent + cls + '::PROPS.ELEMENT_ID = ' + cls + '_t;\n'
+        s += indent + cls + '::PROPS.ELEMENT_ID = ElementType::' + cls + '_t;\n'
     else:
-        s += indent + to_upper_props(cls) + '.ELEMENT_ID = ' + cls + '_t;\n'
+        s += indent + to_upper_props(cls) + '.ELEMENT_ID = ElementType::' + cls + '_t;\n'
     if 'properties' in element:
         for prop, value in sorted(element['properties'].items()):
             if target not in skip_properties or prop not in skip_properties[target]:
@@ -343,22 +355,33 @@ def setelementproperties_cpp(element,indent, defer,done):
                         value = 'feat'
                         prop = 'subset'
                 elif prop == 'accepted_data':
-                    value = tuple(sorted(addfromparents(element['class'],'accepted_data')))
-                    if 'Feature' in value:
+                    values = addfromparents(element['class'],'accepted_data')
+                    if 'Feature' in values:
                         # if Feature is acceptable, ANY feature will be
-                        value += ('AbstractFeature',)
+                        values.add('AbstractFeature')
+                        values.remove('Feature')
+                    if 'WordReference' in values:
+                        for e in sorted(flattenclasses(spec['wrefables'])):
+                            values.add(e)
+                    if 'Hiddenword' in values:
+                        # if Hiddenword is acceptable, ANY AbstractWord will be
+                        values.add('AbstractWord')
+                        values.remove('Hiddenword')
+                    if 'Word' in values:
+                        # if Word is acceptable, ANY AbstractWord will be
+                        values.add('AbstractWord')
+                        values.remove('Word')
+                    value = tuple(sorted(values))
                     if ('textcontainer' in element['properties'] and element['properties']['textcontainer']) or ('phoncontainer' in element['properties'] and element['properties']['phoncontainer']):
                         value += ('XmlText',)
-                    if 'WordReference' in value:
-                        value += tuple( e  for e in sorted(flattenclasses(spec['wrefables'])) )
                 if 'Abstract' not in cls:
                     s += indent + outputvar(cls + '::PROPS.' + prop.upper(),  value, target) + '\n'
                 else:
                     s += indent + to_upper_props(cls) + "." + outputvar(prop.upper(), value,target) + '\n'
     if 'Abstract' not in cls:
-        s += indent + 'element_props[' + cls + '_t] = &' + cls + '::PROPS;\n'
+        s += indent + 'element_props[ElementType::' + cls + '_t] = &' + cls + '::PROPS;\n'
     else:
-        s += indent + 'element_props[' + cls + '_t] = &' + to_upper_props(cls) + ';\n'
+        s += indent + 'element_props[ElementType::' + cls + '_t] = &' + to_upper_props(cls) + ';\n'
     done[cls] = True
     return s
 
@@ -449,7 +472,7 @@ def outputblock(block, target, varname, args, indent = ""):
             s += indent + "class Attrib:\n"
             s += indent + "    " +  ", ".join(spec['attributes']) + " = range(" + str(len(spec['attributes'])) + ")"
         elif target == 'c++':
-            s += indent + "enum Attrib : int { NO_ATT=0, ///<No attribute\n"
+            s += indent + "enum class Attrib : int { NO_ATT=0, ///<No attribute\n"
             value = 1
             for attrib in spec['attributes']:
                 s +=  attrib + '=' + str(value) + ', '
@@ -464,7 +487,7 @@ def outputblock(block, target, varname, args, indent = ""):
     elif block == 'elementtype':
         if target == 'c++':
             # C++ has some extra element-types not in the specification
-            s += indent + "enum ElementType : unsigned int { BASE=0, AbstractFeature_t, "
+            s += indent + "enum class ElementType : unsigned int { BASE=0, AbstractFeature_t, AbstractWord_t, "
             s += ", ".join([ e + '_t' for e in elementnames]) + ", ProcessingInstruction_t, XmlComment_t, XmlText_t,  LastElement };\n"
         elif target == 'rust':
             s += indent + "pub enum ElementType { " + ", ".join([ e for e in elementnames if not e.startswith('Abstract')]) + " }\n"
@@ -480,7 +503,7 @@ def outputblock(block, target, varname, args, indent = ""):
             s += indent + "class AnnotationType:\n"
             s += indent + "    " +  ", ".join(spec['annotationtype']) + " = range(" + str(len(spec['annotationtype'])) + ")"
         elif target == 'c++':
-            s += indent + "enum AnnotationType : int { NO_ANN, ///<No type dummy\n"
+            s += indent + "enum class AnnotationType : int { NO_ANN, ///<No type dummy\n"
             for t in spec['annotationtype']:
                 s += "    " + t + ","
                 if t.lower() in spec['annotationtype_doc']:
@@ -494,9 +517,9 @@ def outputblock(block, target, varname, args, indent = ""):
             raise NotImplementedError("Block " + block + " not implemented for " + target)
     elif block == 'defaultproperties':
         if target == 'c++':
-            s += indent + "ELEMENT_ID = BASE;\n"
-            s += indent + "ACCEPTED_DATA.insert(XmlComment_t);\n"
-            s += indent + "ACCEPTED_DATA.insert(ProcessingInstruction_t);\n"
+            s += indent + "ELEMENT_ID = ElementType::BASE;\n"
+            s += indent + "ACCEPTED_DATA.insert(ElementType::XmlComment_t);\n"
+            s += indent + "ACCEPTED_DATA.insert(ElementType::ProcessingInstruction_t);\n"
             for prop, value in sorted(spec['defaultproperties'].items()):
                 if target not in skip_properties or prop not in skip_properties[target]:
                     s += indent + outputvar( prop.upper(),  value, target) + '\n'
@@ -535,6 +558,17 @@ def outputblock(block, target, varname, args, indent = ""):
             af_element["properties"] = {}
             af_element["properties"]["label"] = "AbstractFeature"
             s = setelementproperties_cpp(af_element,indent,defer,done)
+            #
+            # also we smuggle in a AbstractWord element, which will be
+            # the parent of all *Word elements
+            #
+            af_element = {}
+            af_element["class"] = "AbstractWord"
+            af_element["properties"] = {}
+            af_element["properties"]["label"] = "AbstractWord"
+            af_element["properties"]["printable"] = True
+            af_element["properties"]["speakable"] = True
+            s += setelementproperties_cpp(af_element,indent,defer,done)
             for element in elements:
                 output = setelementproperties_cpp(element,indent, defer,done)
                 if output:
@@ -625,7 +659,7 @@ def outputblock(block, target, varname, args, indent = ""):
             for element in elements:
                 if 'properties' in element and 'xmltag' in element['properties'] and element['properties']['xmltag'] and 'annotationtype' in element['properties']:
                     if 'primaryelement' in element['properties'] and not element['properties']['primaryelement']: continue #not primary, skip
-                    s += indent + "  {  AnnotationType::" + element['properties']['annotationtype'] + ', ' + element['class'] + '_t },\n'
+                    s += indent + "  {  AnnotationType::" + element['properties']['annotationtype'] + ', ' + 'ElementType::' + element['class'] + '_t },\n'
             s += indent + "};\n"
         elif target == 'rust':
             s += indent + "match " + args[0] + " {\n"
@@ -661,22 +695,23 @@ def outputblock(block, target, varname, args, indent = ""):
         if target == 'c++':
             s += indent + "const map<ElementType,string> et_s_map = {\n"
             # first some types local to C++
-            s += indent + "  { BASE, \"FoLiA\" },\n"
-            s += indent + "  { AbstractFeature_t, \"_AbstractFeature\" },\n"
+            s += indent + "  { ElementType::BASE, \"FoLiA\" },\n"
+            s += indent + "  { ElementType::AbstractFeature_t, \"_AbstractFeature\" },\n"
+            s += indent + "  { ElementType::AbstractWord_t, \"_AbstractWord\" },\n"
             # and then the rest
             for element in elements:
                 if 'properties' in element and 'xmltag' in element['properties'] and element['properties']['xmltag']:
-                    s += indent + "  { " + element['class'] + '_t,  "' + element['properties']['xmltag'] + '" },\n'
+                    s += indent + "  { ElementType::" + element['class'] + '_t,  "' + element['properties']['xmltag'] + '" },\n'
                 elif 'properties' in element and 'subset' in element['properties'] and element['properties']['subset']:
                     if element['class'] == 'HeadFeature':
-                        s += indent + "  { HeadFeature_t,  \"headfeature\" },\n"
+                        s += indent + "  { ElementType::HeadFeature_t,  \"headfeature\" },\n"
                     else:
-                        s += indent + "  { " + element['class'] + '_t,  "' + element['properties']['subset'] + '" },\n'
+                        s += indent + "  { ElementType::" + element['class'] + '_t,  "' + element['properties']['subset'] + '" },\n'
                 else:
-                    s += indent + "  { " + element['class'] + '_t,  "_' + element['class'] + '" },\n'
-            s += indent + '  { XmlComment_t, "_XmlComment" },\n'
-            s += indent + '  { ProcessingInstruction_t, "PI" },\n'
-            s += indent + '  { XmlText_t, "_XmlText" }\n'
+                    s += indent + "  { ElementType::" + element['class'] + '_t,  "_' + element['class'] + '" },\n'
+            s += indent + '  { ElementType::XmlComment_t, "_XmlComment" },\n'
+            s += indent + '  { ElementType::ProcessingInstruction_t, "PI" },\n'
+            s += indent + '  { ElementType::XmlText_t, "_XmlText" }\n'
             s += indent + "};\n"
         elif target == 'rust':
             s += indent + "match " + args[0] + " {\n"
@@ -695,22 +730,23 @@ def outputblock(block, target, varname, args, indent = ""):
         if target == 'c++':
             s += indent + "const map<string,ElementType> s_et_map = {\n"
             # first some types local to C++
-            s += indent + "  { \"FoLiA\", BASE },\n"
-            s += indent + "  { \"_AbstractFeature\", AbstractFeature_t },\n"
+            s += indent + "  { \"FoLiA\", ElementType::BASE },\n"
+            s += indent + "  { \"_AbstractFeature\", ElementType::AbstractFeature_t },\n"
+            s += indent + "  { \"_AbstractWord\", ElementType::AbstractWord_t },\n"
             # and then the rest
             for element in elements:
                 if 'properties' in element and 'xmltag' in element['properties'] and element['properties']['xmltag']:
-                    s += indent + '  { "' + element['properties']['xmltag'] + '", ' + element['class'] + '_t  },\n'
+                    s += indent + '  { "' + element['properties']['xmltag'] + '", ElementType::' + element['class'] + '_t  },\n'
                 elif 'properties' in element and 'subset' in element['properties'] and element['properties']['subset']:
                     if element['class'] == 'HeadFeature':
-                        s += indent + "  { \"headfeature\", HeadFeature_t },\n"
+                        s += indent + "  { \"headfeature\", ElementType::HeadFeature_t },\n"
                     else:
-                        s += indent + '  { "' + element['properties']['subset'] + '", ' + element['class'] + '_t  },\n'
+                        s += indent + '  { "' + element['properties']['subset'] + '", ElementType::' + element['class'] + '_t  },\n'
                 else:
-                    s += indent + '  { "_' + element['class'] + '", ' + element['class'] + '_t  },\n'
-            s += indent + '  { "_XmlComment", XmlComment_t  },\n'
-            s += indent + '  { "PI", ProcessingInstruction_t  },\n'
-            s += indent + '  { "_XmlText", XmlText_t  }\n'
+                    s += indent + '  { "_' + element['class'] + '", ElementType::' + element['class'] + '_t  },\n'
+            s += indent + '  { "_XmlComment", ElementType::XmlComment_t  },\n'
+            s += indent + '  { "PI", ElementType::ProcessingInstruction_t  },\n'
+            s += indent + '  { "_XmlText", ElementType::XmlText_t  }\n'
             s += indent + "};\n"
         elif target == 'rust':
             s += indent + "match " + args[0] + " {\n"
@@ -772,28 +808,28 @@ def outputblock(block, target, varname, args, indent = ""):
             raise NotImplementedError("Block " + block + " not implemented for " + target)
     elif block == 'wrefables':
         if target == 'c++':
-            s += indent + "const set<ElementType> wrefables = { " + ", ".join([ e + '_t' for e in sorted(flattenclasses(spec['wrefables'])) ]) + " };\n"
+            s += indent + "const set<ElementType> wrefables = { " + ", ".join([ 'ElementType::' + e + '_t' for e in sorted(flattenclasses(spec['wrefables'])) ]) + " };\n"
         elif target == 'python':
             s += indent + "wrefables = ( " + ", ".join(spec['wrefables']) + ",)\n"
         else:
             raise NotImplementedError("Block " + block + " not implemented for " + target)
     elif block == 'default_ignore':
         if target == 'c++':
-            s += indent + "const set<ElementType> default_ignore = { " + ", ".join([ e + '_t' for e in sorted(flattenclasses(spec['default_ignore'])) ]) + " };\n"
+            s += indent + "const set<ElementType> default_ignore = { " + ", ".join([ 'ElementType::'+ e + '_t' for e in sorted(flattenclasses(spec['default_ignore'])) ]) + " };\n"
         elif target == 'python':
             s += indent + "default_ignore = ( " + ", ".join(spec['default_ignore']) + ",)\n"
         else:
             raise NotImplementedError("Block " + block + " not implemented for " + target)
     elif block == 'default_ignore_annotations':
         if target == 'c++':
-            s += indent + "const set<ElementType> default_ignore_annotations = { " + ", ".join([ e + '_t' for e in sorted(flattenclasses(spec['default_ignore_annotations'])) ]) + " };\n"
+            s += indent + "const set<ElementType> default_ignore_annotations = { " + ", ".join([ 'ElementType::'+ e + '_t' for e in sorted(flattenclasses(spec['default_ignore_annotations'])) ]) + " };\n"
         elif target == 'python':
             s += indent + "default_ignore_annotations = ( " + ", ".join(spec['default_ignore_annotations']) + ",)\n"
         else:
             raise NotImplementedError("Block " + block + " not implemented for " + target)
     elif block == 'default_ignore_structure':
         if target == 'c++':
-            s += indent + "const set<ElementType> default_ignore_structure = { " + ", ".join([ e + '_t' for e in sorted(flattenclasses(spec['default_ignore_structure'])) ]) + " };\n"
+            s += indent + "const set<ElementType> default_ignore_structure = { " + ", ".join([ 'ElementType::' + e + '_t' for e in sorted(flattenclasses(spec['default_ignore_structure'])) ]) + " };\n"
         elif target == 'python':
             s += indent + "default_ignore_structure = ( " + ", ".join(spec['default_ignore_structure']) + ",)\n"
         else:
@@ -809,7 +845,14 @@ def outputblock(block, target, varname, args, indent = ""):
                 if "Feature" in parentset:
                     parentset.remove("Feature")
                     parentset.append("AbstractFeature")
-                s += indent + "   { " + child + '_t' + ", { " + ",".join([p + '_t' for p in parentset ]) + " } },\n"
+                # Word is special too, as we are splitting this in Word
+                # and AbstractWord, which is the parent of every *Word
+                if child == "Word" or child == "Hiddenword":
+                    parentset.append("AbstractWord")
+                if "Word" in parentset:
+                    parentset.remove("Word")
+                    parentset.append("AbstractWord")
+                s += indent + "   { " + 'ElementType::' + child + '_t' + ", { " + ",".join(['ElementType::' + p + '_t' for p in parentset ]) + " } },\n"
             s += indent + "};\n";
         else:
             raise NotImplementedError("Block " + block + " not implemented for " + target)