diff --git a/src/main/java/org/grobid/core/engines/DatasetParser.java b/src/main/java/org/grobid/core/engines/DatasetParser.java index 217e26a..e62cb34 100644 --- a/src/main/java/org/grobid/core/engines/DatasetParser.java +++ b/src/main/java/org/grobid/core/engines/DatasetParser.java @@ -1590,7 +1590,7 @@ public Pair>, List> processTEIDocument(org.w3c.do //Extract relevant section from the TEI // Title, abstract, keywords - // If we process the TEI, at this point the document should be already segmented correctly. + // TODO: remove this If we process the TEI, at this point the document should be already segmented correctly. boolean extractParagraphs = false; XPath xPath = XPathFactory.newInstance().newXPath(); @@ -1770,6 +1770,9 @@ public Pair>, List> processTEIDocument(org.w3c.do localSequence.setRelevantSectionsNamedDatasets(true); localSequence.setRelevantSectionsImplicitDatasets(false); } + + Map> referencesInText = XMLUtilities.getTextNoRefMarkersAndMarkerPositions((org.w3c.dom.Element) paragraphAnnex, 0).getRight(); + localSequence.setReferences(referencesInText); } } @@ -1803,6 +1806,9 @@ public Pair>, List> processTEIDocument(org.w3c.do localSequence.setRelevantSectionsImplicitDatasets(true); selectedSequences.add(localSequence); availabilitySequences.add(localSequence); + + Map> referencesInText = XMLUtilities.getTextNoRefMarkersAndMarkerPositions((org.w3c.dom.Element) item, 0).getRight(); + localSequence.setReferences(referencesInText); } } catch (XPathExpressionException e) { @@ -1854,6 +1860,9 @@ public Pair>, List> processTEIDocument(org.w3c.do localSequence.setRelevantSectionsNamedDatasets(true); localSequence.setRelevantSectionsImplicitDatasets(false); } + + Map> referencesInText = XMLUtilities.getTextNoRefMarkersAndMarkerPositions((org.w3c.dom.Element) paragraphAnnex, 0).getRight(); + localSequence.setReferences(referencesInText); } } @@ -1881,6 +1890,9 @@ public Pair>, List> processTEIDocument(org.w3c.do localSequence.setRelevantSectionsImplicitDatasets(true); selectedSequences.add(localSequence); availabilitySequences.add(localSequence); + + Map> referencesInText = XMLUtilities.getTextNoRefMarkersAndMarkerPositions((org.w3c.dom.Element) item, 0).getRight(); + localSequence.setReferences(referencesInText); } } catch (XPathExpressionException e) { @@ -1911,6 +1923,9 @@ public Pair>, List> processTEIDocument(org.w3c.do localSequence.setRelevantSectionsImplicitDatasets(false); selectedSequences.add(localSequence); availabilitySequences.add(localSequence); + + Map> referencesInText = XMLUtilities.getTextNoRefMarkersAndMarkerPositions((org.w3c.dom.Element) item, 0).getRight(); + localSequence.setReferences(referencesInText); } } catch (XPathExpressionException e) {