Skip to content

Commit

Permalink
Make sure OLE2ScratchpadExtractorFacory is sorted first
Browse files Browse the repository at this point in the history
Otherwise order of found extractors would depend on jar-loading order
and thus might have unexpected side-effects and missing features in
text-extraction.

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1914407 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
centic9 committed Dec 6, 2023
1 parent 497482d commit c8c8130
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 1 deletion.
15 changes: 15 additions & 0 deletions poi/src/main/java/org/apache/poi/extractor/ExtractorFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,21 @@ private interface ProviderMethod {
private ExtractorFactory() {
ClassLoader cl = ExtractorFactory.class.getClassLoader();
ServiceLoader.load(ExtractorProvider.class, cl).forEach(provider::add);

// loading of service-files is non-deterministic as it depends on order of loaded jars
// however we would like to "prefer" one Factory, so let's make sure the more
// powerful "ScratchpadProvider" is sorted first
provider.sort((o1, o2) -> {
if (o1.getClass() != o2.getClass()) {
if (o1.getClass().getSimpleName().equals("OLE2ScratchpadExtractorFactory")) {
return -1;
} else if (o2.getClass().getSimpleName().equals("OLE2ScratchpadExtractorFactory")) {
return 1;
}
}

return o1.getClass().getName().compareTo(o2.getClass().getName());
});
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ public interface ExtractorProvider {
* @param dirs a list to be filled with directory references holding embedded
* @param nonPOIFS a list to be filled with streams which aren't based on POIFS entries
*
* @throws IOException when the format specific extraction fails because of invalid entires
* @throws IOException when the format specific extraction fails because of invalid entries
* @throws java.lang.IllegalArgumentException if implementations do not overwrite this method
*/
default void identifyEmbeddedResources(POIOLE2TextExtractor ext, List<Entry> dirs, List<InputStream> nonPOIFS) throws IOException {
throw new IllegalArgumentException("Error checking for Scratchpad embedded resources");
Expand Down

0 comments on commit c8c8130

Please sign in to comment.