Skip to content

Commit

Permalink
Squashed commit of the following:
Browse files Browse the repository at this point in the history
    in C++ all enum types are 'enum class' now.
    added AbstractWord as common parent of Word and Hiddenword
  • Loading branch information
kosloot committed Dec 5, 2024
1 parent 2c210c6 commit 2582bdf
Showing 1 changed file with 87 additions and 44 deletions.
131 changes: 87 additions & 44 deletions foliatools/foliaspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def outputvar(var, value, target, declare = False):
if value is None:
if declare: raise NotImplementedError("Declare not supported for None values")
if varname in ('REQUIRED_ATTRIBS','OPTIONAL_ATTRIBS'):
return var + ' = NO_ATT;'
return var + ' = Attrib::NO_ATT;'
elif varname == 'ANNOTATIONTYPE':
return var + ' = AnnotationType::NO_ANN;'
elif varname in ('XMLTAG','TEXTDELIMITER'):
Expand Down Expand Up @@ -176,10 +176,10 @@ def outputvar(var, value, target, declare = False):
else:
typedeclaration = ''
operator = '+='
value = [ x + '_t' for x in value ]
value = [ 'ElementType::' + x + '_t' for x in value ]
return typedeclaration + var + ' ' + operator + ' {' + ', '.join(value) + '};'
elif all([ x in spec['attributes'] for x in value ]):
return var + ' = ' + '|'.join(value) + ';'
return var + ' = Attrib::' + '|Attrib::'.join(value) + ';'
else:
return typedeclaration + var + ' = { ' + ', '.join([ '"' + x + '"' for x in value if x]) + ', };'
else:
Expand Down Expand Up @@ -306,31 +306,43 @@ def setelementproperties_cpp(element,indent, defer,done):
# element.
# for C++ we split them
s += indent + to_upper_props(cls) + ' = ABSTRACT_HIGHER_ORDER_ANNOTATION_PROPERTIES;\n'
if cls == 'AbstractWord':
# this is from a pseudo AbstractWord element created special for C++
# the specification has Word both as an abstract base AND as an
# element.
# for C++ we split them
s += indent + to_upper_props(cls) + ' = ABSTRACT_STRUCTURE_ELEMENT_PROPERTIES;\n'
if cls in parents:
for parent in parents[cls]:
if parent not in done:
defer[parent].append(element)
return None
else:
if parent == 'Feature' or cls == 'Feature':
# another trci needed to split Feature
# another trick needed to split Feature
parent = 'AbstractFeature'
if cls == 'Word':
# another trick needed to split Word
parent = 'AbstractWord'
if cls == 'Hiddenword':
# another trick needed to split Word
parent = 'AbstractWord'
if 'Abstract' not in cls:
if 'Abstract' not in parent:
s += indent + cls + '::PROPS = ' + parent + '::PROPS;\n'
else:
s += indent + cls + '::PROPS = ' + to_upper_props(parent) + ';\n'
s += indent + 'abstract_parents[' + cls + '_t] = ' + parent + '_t;\n'
s += indent + 'abstract_parents[ElementType::' + cls + '_t] = ElementType::' + parent + '_t;\n'

else:
if 'Abstract' in parent:
s += indent + to_upper_props(cls) + ' = ' + to_upper_props( parent ) + ';\n'

break
if 'Abstract' not in cls:
s += indent + cls + '::PROPS.ELEMENT_ID = ' + cls + '_t;\n'
s += indent + cls + '::PROPS.ELEMENT_ID = ElementType::' + cls + '_t;\n'
else:
s += indent + to_upper_props(cls) + '.ELEMENT_ID = ' + cls + '_t;\n'
s += indent + to_upper_props(cls) + '.ELEMENT_ID = ElementType::' + cls + '_t;\n'
if 'properties' in element:
for prop, value in sorted(element['properties'].items()):
if target not in skip_properties or prop not in skip_properties[target]:
Expand All @@ -343,22 +355,33 @@ def setelementproperties_cpp(element,indent, defer,done):
value = 'feat'
prop = 'subset'
elif prop == 'accepted_data':
value = tuple(sorted(addfromparents(element['class'],'accepted_data')))
if 'Feature' in value:
values = addfromparents(element['class'],'accepted_data')
if 'Feature' in values:
# if Feature is acceptable, ANY feature will be
value += ('AbstractFeature',)
values.add('AbstractFeature')
values.remove('Feature')
if 'WordReference' in values:
for e in sorted(flattenclasses(spec['wrefables'])):
values.add(e)
if 'Hiddenword' in values:
# if Hiddenword is acceptable, ANY AbstractWord will be
values.add('AbstractWord')
values.remove('Hiddenword')
if 'Word' in values:
# if Word is acceptable, ANY AbstractWord will be
values.add('AbstractWord')
values.remove('Word')
value = tuple(sorted(values))
if ('textcontainer' in element['properties'] and element['properties']['textcontainer']) or ('phoncontainer' in element['properties'] and element['properties']['phoncontainer']):
value += ('XmlText',)
if 'WordReference' in value:
value += tuple( e for e in sorted(flattenclasses(spec['wrefables'])) )
if 'Abstract' not in cls:
s += indent + outputvar(cls + '::PROPS.' + prop.upper(), value, target) + '\n'
else:
s += indent + to_upper_props(cls) + "." + outputvar(prop.upper(), value,target) + '\n'
if 'Abstract' not in cls:
s += indent + 'element_props[' + cls + '_t] = &' + cls + '::PROPS;\n'
s += indent + 'element_props[ElementType::' + cls + '_t] = &' + cls + '::PROPS;\n'
else:
s += indent + 'element_props[' + cls + '_t] = &' + to_upper_props(cls) + ';\n'
s += indent + 'element_props[ElementType::' + cls + '_t] = &' + to_upper_props(cls) + ';\n'
done[cls] = True
return s

Expand Down Expand Up @@ -449,7 +472,7 @@ def outputblock(block, target, varname, args, indent = ""):
s += indent + "class Attrib:\n"
s += indent + " " + ", ".join(spec['attributes']) + " = range(" + str(len(spec['attributes'])) + ")"
elif target == 'c++':
s += indent + "enum Attrib : int { NO_ATT=0, ///<No attribute\n"
s += indent + "enum class Attrib : int { NO_ATT=0, ///<No attribute\n"
value = 1
for attrib in spec['attributes']:
s += attrib + '=' + str(value) + ', '
Expand All @@ -464,7 +487,7 @@ def outputblock(block, target, varname, args, indent = ""):
elif block == 'elementtype':
if target == 'c++':
# C++ has some extra element-types not in the specification
s += indent + "enum ElementType : unsigned int { BASE=0, AbstractFeature_t, "
s += indent + "enum class ElementType : unsigned int { BASE=0, AbstractFeature_t, AbstractWord_t, "
s += ", ".join([ e + '_t' for e in elementnames]) + ", ProcessingInstruction_t, XmlComment_t, XmlText_t, LastElement };\n"
elif target == 'rust':
s += indent + "pub enum ElementType { " + ", ".join([ e for e in elementnames if not e.startswith('Abstract')]) + " }\n"
Expand All @@ -480,7 +503,7 @@ def outputblock(block, target, varname, args, indent = ""):
s += indent + "class AnnotationType:\n"
s += indent + " " + ", ".join(spec['annotationtype']) + " = range(" + str(len(spec['annotationtype'])) + ")"
elif target == 'c++':
s += indent + "enum AnnotationType : int { NO_ANN, ///<No type dummy\n"
s += indent + "enum class AnnotationType : int { NO_ANN, ///<No type dummy\n"
for t in spec['annotationtype']:
s += " " + t + ","
if t.lower() in spec['annotationtype_doc']:
Expand All @@ -494,9 +517,9 @@ def outputblock(block, target, varname, args, indent = ""):
raise NotImplementedError("Block " + block + " not implemented for " + target)
elif block == 'defaultproperties':
if target == 'c++':
s += indent + "ELEMENT_ID = BASE;\n"
s += indent + "ACCEPTED_DATA.insert(XmlComment_t);\n"
s += indent + "ACCEPTED_DATA.insert(ProcessingInstruction_t);\n"
s += indent + "ELEMENT_ID = ElementType::BASE;\n"
s += indent + "ACCEPTED_DATA.insert(ElementType::XmlComment_t);\n"
s += indent + "ACCEPTED_DATA.insert(ElementType::ProcessingInstruction_t);\n"
for prop, value in sorted(spec['defaultproperties'].items()):
if target not in skip_properties or prop not in skip_properties[target]:
s += indent + outputvar( prop.upper(), value, target) + '\n'
Expand Down Expand Up @@ -535,6 +558,17 @@ def outputblock(block, target, varname, args, indent = ""):
af_element["properties"] = {}
af_element["properties"]["label"] = "AbstractFeature"
s = setelementproperties_cpp(af_element,indent,defer,done)
#
# also we smuggle in a AbstractWord element, which will be
# the parent of all *Word elements
#
af_element = {}
af_element["class"] = "AbstractWord"
af_element["properties"] = {}
af_element["properties"]["label"] = "AbstractWord"
af_element["properties"]["printable"] = True
af_element["properties"]["speakable"] = True
s += setelementproperties_cpp(af_element,indent,defer,done)
for element in elements:
output = setelementproperties_cpp(element,indent, defer,done)
if output:
Expand Down Expand Up @@ -625,7 +659,7 @@ def outputblock(block, target, varname, args, indent = ""):
for element in elements:
if 'properties' in element and 'xmltag' in element['properties'] and element['properties']['xmltag'] and 'annotationtype' in element['properties']:
if 'primaryelement' in element['properties'] and not element['properties']['primaryelement']: continue #not primary, skip
s += indent + " { AnnotationType::" + element['properties']['annotationtype'] + ', ' + element['class'] + '_t },\n'
s += indent + " { AnnotationType::" + element['properties']['annotationtype'] + ', ' + 'ElementType::' + element['class'] + '_t },\n'
s += indent + "};\n"
elif target == 'rust':
s += indent + "match " + args[0] + " {\n"
Expand Down Expand Up @@ -661,22 +695,23 @@ def outputblock(block, target, varname, args, indent = ""):
if target == 'c++':
s += indent + "const map<ElementType,string> et_s_map = {\n"
# first some types local to C++
s += indent + " { BASE, \"FoLiA\" },\n"
s += indent + " { AbstractFeature_t, \"_AbstractFeature\" },\n"
s += indent + " { ElementType::BASE, \"FoLiA\" },\n"
s += indent + " { ElementType::AbstractFeature_t, \"_AbstractFeature\" },\n"
s += indent + " { ElementType::AbstractWord_t, \"_AbstractWord\" },\n"
# and then the rest
for element in elements:
if 'properties' in element and 'xmltag' in element['properties'] and element['properties']['xmltag']:
s += indent + " { " + element['class'] + '_t, "' + element['properties']['xmltag'] + '" },\n'
s += indent + " { ElementType::" + element['class'] + '_t, "' + element['properties']['xmltag'] + '" },\n'
elif 'properties' in element and 'subset' in element['properties'] and element['properties']['subset']:
if element['class'] == 'HeadFeature':
s += indent + " { HeadFeature_t, \"headfeature\" },\n"
s += indent + " { ElementType::HeadFeature_t, \"headfeature\" },\n"
else:
s += indent + " { " + element['class'] + '_t, "' + element['properties']['subset'] + '" },\n'
s += indent + " { ElementType::" + element['class'] + '_t, "' + element['properties']['subset'] + '" },\n'
else:
s += indent + " { " + element['class'] + '_t, "_' + element['class'] + '" },\n'
s += indent + ' { XmlComment_t, "_XmlComment" },\n'
s += indent + ' { ProcessingInstruction_t, "PI" },\n'
s += indent + ' { XmlText_t, "_XmlText" }\n'
s += indent + " { ElementType::" + element['class'] + '_t, "_' + element['class'] + '" },\n'
s += indent + ' { ElementType::XmlComment_t, "_XmlComment" },\n'
s += indent + ' { ElementType::ProcessingInstruction_t, "PI" },\n'
s += indent + ' { ElementType::XmlText_t, "_XmlText" }\n'
s += indent + "};\n"
elif target == 'rust':
s += indent + "match " + args[0] + " {\n"
Expand All @@ -695,22 +730,23 @@ def outputblock(block, target, varname, args, indent = ""):
if target == 'c++':
s += indent + "const map<string,ElementType> s_et_map = {\n"
# first some types local to C++
s += indent + " { \"FoLiA\", BASE },\n"
s += indent + " { \"_AbstractFeature\", AbstractFeature_t },\n"
s += indent + " { \"FoLiA\", ElementType::BASE },\n"
s += indent + " { \"_AbstractFeature\", ElementType::AbstractFeature_t },\n"
s += indent + " { \"_AbstractWord\", ElementType::AbstractWord_t },\n"
# and then the rest
for element in elements:
if 'properties' in element and 'xmltag' in element['properties'] and element['properties']['xmltag']:
s += indent + ' { "' + element['properties']['xmltag'] + '", ' + element['class'] + '_t },\n'
s += indent + ' { "' + element['properties']['xmltag'] + '", ElementType::' + element['class'] + '_t },\n'
elif 'properties' in element and 'subset' in element['properties'] and element['properties']['subset']:
if element['class'] == 'HeadFeature':
s += indent + " { \"headfeature\", HeadFeature_t },\n"
s += indent + " { \"headfeature\", ElementType::HeadFeature_t },\n"
else:
s += indent + ' { "' + element['properties']['subset'] + '", ' + element['class'] + '_t },\n'
s += indent + ' { "' + element['properties']['subset'] + '", ElementType::' + element['class'] + '_t },\n'
else:
s += indent + ' { "_' + element['class'] + '", ' + element['class'] + '_t },\n'
s += indent + ' { "_XmlComment", XmlComment_t },\n'
s += indent + ' { "PI", ProcessingInstruction_t },\n'
s += indent + ' { "_XmlText", XmlText_t }\n'
s += indent + ' { "_' + element['class'] + '", ElementType::' + element['class'] + '_t },\n'
s += indent + ' { "_XmlComment", ElementType::XmlComment_t },\n'
s += indent + ' { "PI", ElementType::ProcessingInstruction_t },\n'
s += indent + ' { "_XmlText", ElementType::XmlText_t }\n'
s += indent + "};\n"
elif target == 'rust':
s += indent + "match " + args[0] + " {\n"
Expand Down Expand Up @@ -772,28 +808,28 @@ def outputblock(block, target, varname, args, indent = ""):
raise NotImplementedError("Block " + block + " not implemented for " + target)
elif block == 'wrefables':
if target == 'c++':
s += indent + "const set<ElementType> wrefables = { " + ", ".join([ e + '_t' for e in sorted(flattenclasses(spec['wrefables'])) ]) + " };\n"
s += indent + "const set<ElementType> wrefables = { " + ", ".join([ 'ElementType::' + e + '_t' for e in sorted(flattenclasses(spec['wrefables'])) ]) + " };\n"
elif target == 'python':
s += indent + "wrefables = ( " + ", ".join(spec['wrefables']) + ",)\n"
else:
raise NotImplementedError("Block " + block + " not implemented for " + target)
elif block == 'default_ignore':
if target == 'c++':
s += indent + "const set<ElementType> default_ignore = { " + ", ".join([ e + '_t' for e in sorted(flattenclasses(spec['default_ignore'])) ]) + " };\n"
s += indent + "const set<ElementType> default_ignore = { " + ", ".join([ 'ElementType::'+ e + '_t' for e in sorted(flattenclasses(spec['default_ignore'])) ]) + " };\n"
elif target == 'python':
s += indent + "default_ignore = ( " + ", ".join(spec['default_ignore']) + ",)\n"
else:
raise NotImplementedError("Block " + block + " not implemented for " + target)
elif block == 'default_ignore_annotations':
if target == 'c++':
s += indent + "const set<ElementType> default_ignore_annotations = { " + ", ".join([ e + '_t' for e in sorted(flattenclasses(spec['default_ignore_annotations'])) ]) + " };\n"
s += indent + "const set<ElementType> default_ignore_annotations = { " + ", ".join([ 'ElementType::'+ e + '_t' for e in sorted(flattenclasses(spec['default_ignore_annotations'])) ]) + " };\n"
elif target == 'python':
s += indent + "default_ignore_annotations = ( " + ", ".join(spec['default_ignore_annotations']) + ",)\n"
else:
raise NotImplementedError("Block " + block + " not implemented for " + target)
elif block == 'default_ignore_structure':
if target == 'c++':
s += indent + "const set<ElementType> default_ignore_structure = { " + ", ".join([ e + '_t' for e in sorted(flattenclasses(spec['default_ignore_structure'])) ]) + " };\n"
s += indent + "const set<ElementType> default_ignore_structure = { " + ", ".join([ 'ElementType::' + e + '_t' for e in sorted(flattenclasses(spec['default_ignore_structure'])) ]) + " };\n"
elif target == 'python':
s += indent + "default_ignore_structure = ( " + ", ".join(spec['default_ignore_structure']) + ",)\n"
else:
Expand All @@ -809,7 +845,14 @@ def outputblock(block, target, varname, args, indent = ""):
if "Feature" in parentset:
parentset.remove("Feature")
parentset.append("AbstractFeature")
s += indent + " { " + child + '_t' + ", { " + ",".join([p + '_t' for p in parentset ]) + " } },\n"
# Word is special too, as we are splitting this in Word
# and AbstractWord, which is the parent of every *Word
if child == "Word" or child == "Hiddenword":
parentset.append("AbstractWord")
if "Word" in parentset:
parentset.remove("Word")
parentset.append("AbstractWord")
s += indent + " { " + 'ElementType::' + child + '_t' + ", { " + ",".join(['ElementType::' + p + '_t' for p in parentset ]) + " } },\n"
s += indent + "};\n";
else:
raise NotImplementedError("Block " + block + " not implemented for " + target)
Expand Down

0 comments on commit 2582bdf

Please sign in to comment.