cosmetics

(cherry picked from commit 0a5cedd)
DataSeer · Oct 22, 2024 · b18454b · b18454b
1 parent 774dd78
commit b18454b
Showing 1 changed file with 13 additions and 9 deletions.
diff --git a/src/main/java/org/grobid/core/engines/DatasetParser.java b/src/main/java/org/grobid/core/engines/DatasetParser.java
@@ -234,17 +234,17 @@ public List<List<Dataset>> processing(List<DatasetDocumentSequence> datasetDocum
                 for (Dataset entity : localDatasets) {
                     if (entity.getDatasetName() != null) {
                         String term = entity.getDatasetName().getNormalizedForm();
-                        if (term == null || term.length() == 0) {
-                            indexToBeFiltered.add(Integer.valueOf(k));
+                        if (StringUtils.isBlank(term)) {
+                            indexToBeFiltered.add(k);
                         } else if (DatastetLexicon.getInstance().isEnglishStopword(term)) {
-                            indexToBeFiltered.add(Integer.valueOf(k));
+                            indexToBeFiltered.add(k);
                         } else if (DatastetLexicon.getInstance().isBlackListedNamedDataset(term.toLowerCase())) {
-                            indexToBeFiltered.add(Integer.valueOf(k));
+                            indexToBeFiltered.add(k);
                         }
                     }
                     k++;
                 }
-                if (indexToBeFiltered.size() > 0) {
+                if (CollectionUtils.isNotEmpty(indexToBeFiltered)) {
                     for (int j = indexToBeFiltered.size() - 1; j >= 0; j--) {
                         localDatasets.remove(indexToBeFiltered.get(j).intValue());
                     }
@@ -1596,7 +1596,8 @@ public Pair<List<List<Dataset>>, List<BibDataSet>> processTEIDocument(org.w3c.do
         XPath xPath = XPathFactory.newInstance().newXPath();
 
         try {
-            org.w3c.dom.Node titleNode = (org.w3c.dom.Node) xPath.evaluate("//*[local-name() = 'titleStmt']/*[local-name() = 'title']",
+            org.w3c.dom.Node titleNode = (org.w3c.dom.Node) xPath.evaluate(
+                    "//*[local-name() = 'titleStmt']/*[local-name() = 'title']",
                     doc,
                     XPathConstants.NODE);
             if (titleNode == null) {
@@ -1729,7 +1730,8 @@ public Pair<List<List<Dataset>>, List<BibDataSet>> processTEIDocument(org.w3c.do
         // Annex might contain misclassified relevant sections
         try {
             String expression = "//*[local-name() = 'text']/*[local-name() = 'back']/*[local-name() = 'div'][@*[local-name()='type' and .='annex']]/*[local-name() = 'div']";
-            org.w3c.dom.NodeList bodyNodeList = (org.w3c.dom.NodeList) xPath.evaluate(expression,
+            org.w3c.dom.NodeList bodyNodeList = (org.w3c.dom.NodeList) xPath.evaluate(
+                    expression,
                     doc,
                     XPathConstants.NODESET);
             for (int i = 0; i < bodyNodeList.getLength(); i++) {
@@ -1783,14 +1785,16 @@ public Pair<List<List<Dataset>>, List<BibDataSet>> processTEIDocument(org.w3c.do
         // specific section types statement
         DatastetAnalyzer datastetAnalyzer = DatastetAnalyzer.getInstance();
 
-        List<String> specificSectionTypesAnnex = Arrays.asList("availability", "acknowledgement", "funding");
+        // Looks like acknowledgment and funding may be misleading
+        List<String> specificSectionTypesAnnex = Arrays.asList("availability", "data-availability");
 
         List<DatasetDocumentSequence> availabilitySequences = new ArrayList<>();
         for (String sectionType : specificSectionTypesAnnex) {
             try {
                 String expression = "//*[local-name() = 'text']/*[local-name() = 'back']/*[local-name() = 'div'][@*[local-name()='type' and .='" + sectionType + "']]/*[local-name() = 'div']/*[local-name() = 'p']";
                 expression = extractParagraphs ? expression : expression + "/*[local-name() = 's']";
-                org.w3c.dom.NodeList annexNodeList = (org.w3c.dom.NodeList) xPath.evaluate(expression,
+                org.w3c.dom.NodeList annexNodeList = (org.w3c.dom.NodeList) xPath.evaluate(
+                        expression,
                         doc,
                         XPathConstants.NODESET);
                 for (int i = 0; i < annexNodeList.getLength(); i++) {