Skip to content

Commit

Permalink
fix xpath to fall back into div into TEI/back
Browse files Browse the repository at this point in the history
  • Loading branch information
lfoppiano committed Oct 13, 2024
1 parent da6746c commit 920323f
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion src/main/java/org/grobid/core/engines/DatasetParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -1873,7 +1873,9 @@ public Pair<List<List<Dataset>>, List<BibDataSet>> processTEIDocument(org.w3c.do


try {
String expression = "//*[local-name() = 'text']/*[local-name() = 'back']/*[local-name() = 'div'][not(@type) or not(contains('" + String.join("|", specificSectionTypesAnnex) + "', concat('|', @type, '|')))]/*[local-name()='div']/*[local-name() = 'p']";
// String expression = "//*[local-name() = 'text']/*[local-name() = 'back']/*[local-name() = 'div'][not(@type) or (not(contains(@type, 'availability')) and not(contains(@type, 'acknowledgement')) and not(contains(@type, 'funding')))]/*[local-name()='div']/*[local-name() = 'p']";

String expression = "//*[local-name() = 'text']/*[local-name() = 'back']/*[local-name() = 'div'][not(@type) or (" + String.join(" and ", specificSectionTypesAnnex.stream().map(type-> "not(contains(@type, '"+type+"'))").collect(Collectors.joining())) + ")]/*[local-name()='div']/*[local-name() = 'p']";
expression = extractParagraphs ? expression : expression + "/*[local-name() = 's']";
org.w3c.dom.NodeList annexNodeList = (org.w3c.dom.NodeList) xPath.evaluate(expression,
doc,
Expand Down

0 comments on commit 920323f

Please sign in to comment.