Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

syncing with dataquest-dev/dtq-dev #1139

Merged
merged 11 commits into from
Dec 4, 2024
Merged
4 changes: 2 additions & 2 deletions dspace-api/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -339,8 +339,8 @@
<dependencies>
<dependency>
<groupId>org.piwik.java.tracking</groupId>
<artifactId>matomo-java-tracker</artifactId>
<version>2.0</version>
<artifactId>matomo-java-tracker-java11</artifactId>
<version>3.4.0</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
Expand Down
229 changes: 229 additions & 0 deletions dspace-api/src/main/java/org/dspace/administer/FileDownloader.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.administer;

import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.sql.SQLException;
import java.util.List;
import java.util.UUID;
import java.util.stream.Stream;

import org.apache.commons.cli.ParseException;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Bitstream;
import org.dspace.content.BitstreamFormat;
import org.dspace.content.Bundle;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Item;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.BitstreamFormatService;
import org.dspace.content.service.BitstreamService;
import org.dspace.content.service.ItemService;
import org.dspace.content.service.WorkspaceItemService;
import org.dspace.core.Context;
import org.dspace.eperson.EPerson;
import org.dspace.eperson.factory.EPersonServiceFactory;
import org.dspace.eperson.service.EPersonService;
import org.dspace.identifier.IdentifierNotFoundException;
import org.dspace.identifier.IdentifierNotResolvableException;
import org.dspace.identifier.factory.IdentifierServiceFactory;
import org.dspace.identifier.service.IdentifierService;
import org.dspace.scripts.DSpaceRunnable;
import org.dspace.scripts.configuration.ScriptConfiguration;
import org.dspace.utils.DSpace;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


public class FileDownloader extends DSpaceRunnable<FileDownloaderConfiguration> {

private static final Logger log = LoggerFactory.getLogger(FileDownloader.class);
private boolean help = false;
private UUID itemUUID;
private int workspaceID;
private String pid;
private URI uri;
private String epersonMail;
private String bitstreamName;
private EPersonService epersonService;
private ItemService itemService;
private WorkspaceItemService workspaceItemService;
private IdentifierService identifierService;
private BitstreamService bitstreamService;
private BitstreamFormatService bitstreamFormatService;
private final HttpClient httpClient = HttpClient.newBuilder()
.followRedirects(HttpClient.Redirect.NORMAL)
.build();

/**
* This method will return the Configuration that the implementing DSpaceRunnable uses
*
* @return The {@link ScriptConfiguration} that this implementing DspaceRunnable uses
*/
@Override
public FileDownloaderConfiguration getScriptConfiguration() {
return new DSpace().getServiceManager().getServiceByName("file-downloader",
FileDownloaderConfiguration.class);
}

/**
* This method has to be included in every script and handles the setup of the script by parsing the CommandLine
* and setting the variables
*
* @throws ParseException If something goes wrong
*/
@Override
public void setup() throws ParseException {
log.debug("Setting up {}", FileDownloader.class.getName());
if (commandLine.hasOption("h")) {
help = true;
return;
}

if (!commandLine.hasOption("u")) {
throw new ParseException("No URL option has been provided");
}

if (!commandLine.hasOption("i") && !commandLine.hasOption("w") && !commandLine.hasOption("p")) {
throw new ParseException("No item id option has been provided");
}

if (getEpersonIdentifier() == null && !commandLine.hasOption("e")) {
throw new ParseException("No eperson option has been provided");
}


this.epersonService = EPersonServiceFactory.getInstance().getEPersonService();
this.itemService = ContentServiceFactory.getInstance().getItemService();
this.workspaceItemService = ContentServiceFactory.getInstance().getWorkspaceItemService();
this.bitstreamService = ContentServiceFactory.getInstance().getBitstreamService();
this.bitstreamFormatService = ContentServiceFactory.getInstance().getBitstreamFormatService();
this.identifierService = IdentifierServiceFactory.getInstance().getIdentifierService();

try {
uri = new URI(commandLine.getOptionValue("u"));
} catch (URISyntaxException e) {
throw new ParseException("The provided URL is not a valid URL");
}

if (commandLine.hasOption("i")) {
itemUUID = UUID.fromString(commandLine.getOptionValue("i"));
} else if (commandLine.hasOption("w")) {
workspaceID = Integer.parseInt(commandLine.getOptionValue("w"));
} else if (commandLine.hasOption("p")) {
pid = commandLine.getOptionValue("p");
}

epersonMail = commandLine.getOptionValue("e");

if (commandLine.hasOption("n")) {
bitstreamName = commandLine.getOptionValue("n");
}
}

/**
* This method has to be included in every script and this will be the main execution block for the script that'll
* contain all the logic needed
*
* @throws Exception If something goes wrong
*/
@Override
public void internalRun() throws Exception {
log.debug("Running {}", FileDownloader.class.getName());
if (help) {
printHelp();
return;
}

Context context = new Context();
context.setCurrentUser(getEperson(context));

//find the item by the given id
Item item = findItem(context);
if (item == null) {
throw new IllegalArgumentException("No item found for the given ID");
}

HttpRequest request = HttpRequest.newBuilder()
.uri(uri)
.build();

HttpResponse<InputStream> response = httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream());

if (response.statusCode() >= 400) {
throw new IllegalArgumentException("The provided URL returned a status code of " + response.statusCode());
}

//use the provided value, the content-disposition header, the last part of the uri
if (bitstreamName == null) {
bitstreamName = response.headers().firstValue("Content-Disposition")
.filter(value -> value.contains("filename=")).flatMap(value -> Stream.of(value.split(";"))
.filter(v -> v.contains("filename="))
.findFirst()
.map(fvalue -> fvalue.replaceFirst("filename=", "").replaceAll("\"", "")))
.orElse(uri.getPath().substring(uri.getPath().lastIndexOf('/') + 1));
}

try (InputStream is = response.body()) {
saveFileToItem(context, item, is, bitstreamName);
}

context.commit();
}

private Item findItem(Context context) throws SQLException {
if (itemUUID != null) {
return itemService.find(context, itemUUID);
} else if (workspaceID != 0) {
return workspaceItemService.find(context, workspaceID).getItem();
} else {
try {
DSpaceObject dso = identifierService.resolve(context, pid);
if (dso instanceof Item) {
return (Item) dso;
} else {
throw new IllegalArgumentException("The provided identifier does not resolve to an item");
}
} catch (IdentifierNotFoundException | IdentifierNotResolvableException e) {
throw new IllegalArgumentException(e);
}
}
}

private void saveFileToItem(Context context, Item item, InputStream is, String name)
throws SQLException, AuthorizeException, IOException {
log.debug("Saving file to item {}", item.getID());
List<Bundle> originals = item.getBundles("ORIGINAL");
Bitstream b;
if (originals.isEmpty()) {
b = itemService.createSingleBitstream(context, is, item);
} else {
Bundle bundle = originals.get(0);
b = bitstreamService.create(context, bundle, is);
}
b.setName(context, name);
//now guess format of the bitstream
BitstreamFormat bf = bitstreamFormatService.guessFormat(context, b);
b.setFormat(context, bf);
}

private EPerson getEperson(Context context) throws SQLException {
if (getEpersonIdentifier() != null) {
return epersonService.find(context, getEpersonIdentifier());
} else {
return epersonService.findByEmail(context, epersonMail);
}
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.administer;

import org.apache.commons.cli.OptionGroup;
import org.apache.commons.cli.Options;
import org.dspace.scripts.configuration.ScriptConfiguration;

public class FileDownloaderConfiguration extends ScriptConfiguration<FileDownloader> {

private Class<FileDownloader> dspaceRunnableClass;

/**
* Generic getter for the dspaceRunnableClass
*
* @return the dspaceRunnableClass value of this ScriptConfiguration
*/
@Override
public Class<FileDownloader> getDspaceRunnableClass() {
return dspaceRunnableClass;
}

/**
* Generic setter for the dspaceRunnableClass
*
* @param dspaceRunnableClass The dspaceRunnableClass to be set on this IndexDiscoveryScriptConfiguration
*/
@Override
public void setDspaceRunnableClass(Class<FileDownloader> dspaceRunnableClass) {
this.dspaceRunnableClass = dspaceRunnableClass;
}

/**
* The getter for the options of the Script
*
* @return the options value of this ScriptConfiguration
*/
@Override
public Options getOptions() {
if (options == null) {

Options options = new Options();
OptionGroup ids = new OptionGroup();

options.addOption("h", "help", false, "help");

options.addOption("u", "url", true, "source url");
options.getOption("u").setRequired(true);

options.addOption("i", "uuid", true, "item uuid");
options.addOption("w", "wsid", true, "workspace id");
options.addOption("p", "pid", true, "item pid (e.g. handle or doi)");
ids.addOption(options.getOption("i"));
ids.addOption(options.getOption("w"));
ids.addOption(options.getOption("p"));
ids.setRequired(true);

options.addOption("e", "eperson", true, "eperson email");
options.getOption("e").setRequired(false);

options.addOption("n", "name", true, "name of the file/bitstream");
options.getOption("n").setRequired(false);

super.options = options;
}
return options;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import java.sql.SQLException;
import java.text.MessageFormat;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Objects;
import javax.servlet.http.HttpServletRequest;
Expand Down Expand Up @@ -69,7 +70,6 @@ public ClarinMatomoBitstreamTracker() {
@Override
protected void preTrack(Context context, MatomoRequest matomoRequest, Item item, HttpServletRequest request) {
super.preTrack(context, matomoRequest, item, request);

matomoRequest.setSiteId(siteId);
log.debug("Logging to site " + matomoRequest.getSiteId());
String itemIdentifier = getItemIdentifier(item);
Expand All @@ -82,6 +82,11 @@ protected void preTrack(Context context, MatomoRequest matomoRequest, Item item,
}
try {
matomoRequest.setPageCustomVariable(new CustomVariable("source", "bitstream"), 1);
// Add the Item handle into the request as a custom dimension
LinkedHashMap<Long, Object> handleDimension = new LinkedHashMap<>();
handleDimension.put(configurationService.getLongProperty("matomo.custom.dimension.handle.id",
1L), item.getHandle());
matomoRequest.setDimensions(handleDimension);
} catch (MatomoException e) {
log.error(e);
}
Expand Down
Loading
Loading