From 920323fa0cb5cf5dc1bfab1aed2df90918065b3d Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Sun, 13 Oct 2024 12:24:52 +0200 Subject: [PATCH] fix xpath to fall back into div into TEI/back --- src/main/java/org/grobid/core/engines/DatasetParser.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/grobid/core/engines/DatasetParser.java b/src/main/java/org/grobid/core/engines/DatasetParser.java index e62cb34..4d6a4da 100644 --- a/src/main/java/org/grobid/core/engines/DatasetParser.java +++ b/src/main/java/org/grobid/core/engines/DatasetParser.java @@ -1873,7 +1873,9 @@ public Pair>, List> processTEIDocument(org.w3c.do try { - String expression = "//*[local-name() = 'text']/*[local-name() = 'back']/*[local-name() = 'div'][not(@type) or not(contains('" + String.join("|", specificSectionTypesAnnex) + "', concat('|', @type, '|')))]/*[local-name()='div']/*[local-name() = 'p']"; +// String expression = "//*[local-name() = 'text']/*[local-name() = 'back']/*[local-name() = 'div'][not(@type) or (not(contains(@type, 'availability')) and not(contains(@type, 'acknowledgement')) and not(contains(@type, 'funding')))]/*[local-name()='div']/*[local-name() = 'p']"; + + String expression = "//*[local-name() = 'text']/*[local-name() = 'back']/*[local-name() = 'div'][not(@type) or (" + String.join(" and ", specificSectionTypesAnnex.stream().map(type-> "not(contains(@type, '"+type+"'))").collect(Collectors.joining())) + ")]/*[local-name()='div']/*[local-name() = 'p']"; expression = extractParagraphs ? expression : expression + "/*[local-name() = 's']"; org.w3c.dom.NodeList annexNodeList = (org.w3c.dom.NodeList) xPath.evaluate(expression, doc,