From 8431d7f9e95ad38f854e81a62b0ada45777897de Mon Sep 17 00:00:00 2001 From: Peter Kiraly Date: Wed, 18 Sep 2019 17:14:41 +0200 Subject: [PATCH 001/608] Fix at 'Show the dataset whose id is passed' section #6083 --- doc/sphinx-guides/source/api/native-api.rst | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 0cb2dcead64..9b39cc0ca20 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -413,10 +413,15 @@ Get JSON Representation of a Dataset curl http://localhost:8080/api/datasets/:persistentId/versions/:draft?persistentId=doi:10.5072/FK2/J8SJZB - |CORS| Show the dataset whose id is passed:: - GET http://$SERVER/api/datasets/$id?key=$apiKey + curl http://$SERVER/api/datasets/$id?key=$apiKey + +fully expanded:: + + curl http://localhost:8080/api/datasets/xxxx?key=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + +The (numeric) id isn't shown up at the user interface (which uses the DOI for identification). You can get it from the JSON representation of the dataset. List Versions of a Dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~ From 07b34b0b2edcbaec727310ff3c49e09430a8c06a Mon Sep 17 00:00:00 2001 From: Robert Treacy Date: Thu, 29 Jul 2021 17:35:26 -0400 Subject: [PATCH 002/608] initial commit impements POST-redirect-GET for DP Creator tool POST is currently done on server, gets a redirect response, and GETs the new location in the browser Need to change the way the base context is gotten for POST, as in the GET code, it always uses the extenal tool url as provided in the configuration - the redirect use be a different context than the configured tool url. --- .../externaltools/ExternalToolHandler.java | 153 +++++++++++++++++- 1 file changed, 152 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java index a4a51666cc5..ff616d08a4f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java @@ -8,14 +8,26 @@ import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.externaltools.ExternalTool.ReservedWord; import edu.harvard.iq.dataverse.util.SystemConfig; +import java.io.IOException; import java.io.StringReader; +import java.net.HttpURLConnection; +import java.net.URI; +import java.net.URLEncoder; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.logging.Level; import java.util.logging.Logger; import javax.json.Json; import javax.json.JsonArray; import javax.json.JsonObject; import javax.json.JsonReader; +import javax.ws.rs.HttpMethod; /** * Handles an operation on a specific file. Requires a file id in order to be @@ -33,6 +45,8 @@ public class ExternalToolHandler { private ApiToken apiToken; private String localeCode; + private String requestMethod; + private String toolContext; /** * File level tool @@ -44,6 +58,7 @@ public class ExternalToolHandler { */ public ExternalToolHandler(ExternalTool externalTool, DataFile dataFile, ApiToken apiToken, FileMetadata fileMetadata, String localeCode) { this.externalTool = externalTool; + toolContext = externalTool.getToolUrl(); if (dataFile == null) { String error = "A DataFile is required."; logger.warning("Error in ExternalToolHandler constructor: " + error); @@ -106,6 +121,16 @@ public String getQueryParametersForUrl() { // TODO: rename to handleRequest() to someday handle sending headers as well as query parameters. public String getQueryParametersForUrl(boolean preview) { + requestMethod = requestMethod(); + if (requestMethod().equals(HttpMethod.POST)){ + try { + return getFormData(); + } catch (IOException ex) { + Logger.getLogger(ExternalToolHandler.class.getName()).log(Level.SEVERE, null, ex); + } catch (InterruptedException ex) { + Logger.getLogger(ExternalToolHandler.class.getName()).log(Level.SEVERE, null, ex); + } + } String toolParameters = externalTool.getToolParameters(); JsonReader jsonReader = Json.createReader(new StringReader(toolParameters)); JsonObject obj = jsonReader.readObject(); @@ -183,9 +208,135 @@ private String getQueryParam(String key, String value) { } return null; } + + private String getFormDataValue(String key, String value) { + ReservedWord reservedWord = ReservedWord.fromString(value); + switch (reservedWord) { + case FILE_ID: + // getDataFile is never null for file tools because of the constructor + return ""+getDataFile().getId(); + case FILE_PID: + GlobalId filePid = getDataFile().getGlobalId(); + if (filePid != null) { + return ""+getDataFile().getGlobalId(); + } + break; + case SITE_URL: + return ""+SystemConfig.getDataverseSiteUrlStatic(); + case API_TOKEN: + String apiTokenString = null; + ApiToken theApiToken = getApiToken(); + if (theApiToken != null) { + apiTokenString = theApiToken.getTokenString(); + return "" + apiTokenString; + } + break; + case DATASET_ID: + return "" + dataset.getId(); + case DATASET_PID: + return "" + dataset.getGlobalId().asString(); + case DATASET_VERSION: + String versionString = null; + if(fileMetadata!=null) { //true for file case + versionString = fileMetadata.getDatasetVersion().getFriendlyVersionNumber(); + } else { //Dataset case - return the latest visible version (unless/until the dataset case allows specifying a version) + if (getApiToken() != null) { + versionString = dataset.getLatestVersion().getFriendlyVersionNumber(); + } else { + versionString = dataset.getLatestVersionForCopy().getFriendlyVersionNumber(); + } + } + if (("DRAFT").equals(versionString)) { + versionString = ":draft"; // send the token needed in api calls that can be substituted for a numeric + // version. + } + return "" + versionString; + case FILE_METADATA_ID: + if(fileMetadata!=null) { //true for file case + return "" + fileMetadata.getId(); + } + case LOCALE_CODE: + return "" + getLocaleCode(); + default: + break; + } + return null; + } + + private String getFormData() throws IOException, InterruptedException{ + String url = ""; + String toolParameters = externalTool.getToolParameters(); + JsonReader jsonReader = Json.createReader(new StringReader(toolParameters)); + JsonObject obj = jsonReader.readObject(); + JsonArray queryParams = obj.getJsonArray("queryParameters"); + if (queryParams == null || queryParams.isEmpty()) { + return ""; + } + Map data = new HashMap<>(); + queryParams.getValuesAs(JsonObject.class).forEach((queryParam) -> { + queryParam.keySet().forEach((key) -> { + String value = queryParam.getString(key); + String param = getFormDataValue(key, value); + if (param != null && !param.isEmpty()) { + data.put(key,param); + } + }); + }); + HttpClient client = HttpClient.newHttpClient(); + HttpRequest request = HttpRequest.newBuilder().POST(ofFormData(data)).uri(URI.create(externalTool.getToolUrl())) + .header("Content-Type", "application/x-www-form-urlencoded") + .build(); + + HttpResponse response = client.send(request, HttpResponse.BodyHandlers.ofString()); + boolean redirect=false; + int status = response.statusCode(); + if (status != HttpURLConnection.HTTP_OK) { + if (status == HttpURLConnection.HTTP_MOVED_TEMP + || status == HttpURLConnection.HTTP_MOVED_PERM + || status == HttpURLConnection.HTTP_SEE_OTHER) { + redirect = true; + } + } + if (redirect=true){ + String newUrl = response.headers().firstValue("location").get(); + System.out.println(newUrl); + toolContext = "http://" + response.uri().getAuthority(); + + url = newUrl; + } + + System.out.println(response.statusCode()); + System.out.println(response.body()); + + return url; + + } + + public static HttpRequest.BodyPublisher ofFormData(Map data) { + var builder = new StringBuilder(); + data.entrySet().stream().map((var entry) -> { + if (builder.length() > 0) { + builder.append("&"); + } + StringBuilder append = builder.append(URLEncoder.encode(entry.getKey().toString(), StandardCharsets.UTF_8)); + return entry; + }).forEachOrdered(entry -> { + builder.append("="); + builder.append(URLEncoder.encode(entry.getValue().toString(), StandardCharsets.UTF_8)); + }); + return HttpRequest.BodyPublishers.ofString(builder.toString()); + } + + // placeholder for a way to use the POST method instead of the GET method + public String requestMethod(){ + if (externalTool.getDisplayName().startsWith("DP")) + return HttpMethod.POST; + return HttpMethod.GET; + } public String getToolUrlWithQueryParams() { - return externalTool.getToolUrl() + getQueryParametersForUrl(); + String params = getQueryParametersForUrl(); + return toolContext + params; } public String getToolUrlForPreviewMode() { From f74e0c2c855d7c5de2dc5233bcb4d5a34c159629 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 9 Sep 2021 17:47:56 -0400 Subject: [PATCH 003/608] add bonding box indexing --- conf/solr/8.8.1/schema.xml | 9 ++++ .../iq/dataverse/search/IndexServiceBean.java | 42 +++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/conf/solr/8.8.1/schema.xml b/conf/solr/8.8.1/schema.xml index c6f6cd37cd6..622c4661f6c 100644 --- a/conf/solr/8.8.1/schema.xml +++ b/conf/solr/8.8.1/schema.xml @@ -450,6 +450,9 @@ + + + + + @@ -909,6 +915,9 @@ --> + + diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index d72e2a7f642..b718a63ed95 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -5,6 +5,7 @@ import edu.harvard.iq.dataverse.DataFileTag; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetField; +import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; import edu.harvard.iq.dataverse.DatasetFieldConstant; import edu.harvard.iq.dataverse.DatasetFieldType; import edu.harvard.iq.dataverse.DatasetLinkingServiceBean; @@ -883,6 +884,47 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set d } } } + + //ToDo - define a geom/bbox type solr field and find those instead of just this one + if(dsfType.getName().equals(DatasetFieldConstant.geographicBoundingBox)) { + for (DatasetFieldCompoundValue compoundValue : dsf.getDatasetFieldCompoundValues()) { + String westLon=null; + String eastLon=null; + String northLat=null; + String southLat=null; + for(DatasetField childDsf: compoundValue.getChildDatasetFields()) { + switch (childDsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.westLongitude: + westLon = childDsf.getRawValue(); + break; + case DatasetFieldConstant.eastLongitude: + eastLon = childDsf.getRawValue(); + break; + case DatasetFieldConstant.northLatitude: + northLat = childDsf.getRawValue(); + break; + case DatasetFieldConstant.southLatitude: + southLat = childDsf.getRawValue(); + break; + } + } + if ((eastLon != null || westLon != null) && (northLat != null || southLat != null)) { + // we have a point or a box, so proceed + if (eastLon == null) { + eastLon = westLon; + } else if (westLon == null) { + westLon = eastLon; + } + if (northLat == null) { + northLat = southLat; + } else if (southLat == null) { + southLat = northLat; + } + //W, E, N, S + solrInputDocument.addField("solr_srpt", "ENVELOPE(" + westLon + "," + eastLon + "," + northLat + "," + southLat + ")"); + } + } + } } } From d78f20d7795846626c67ff27c81c7b4a57677727 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 3 Mar 2022 08:50:18 -0500 Subject: [PATCH 004/608] same utility function from 3B work --- .../java/edu/harvard/iq/dataverse/util/json/JsonUtil.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java index ae6935945e8..f4a3c635f8b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java @@ -3,6 +3,8 @@ import com.google.gson.Gson; import com.google.gson.GsonBuilder; import com.google.gson.JsonObject; + +import java.io.StringReader; import java.io.StringWriter; import java.util.HashMap; import java.util.Map; @@ -56,4 +58,9 @@ public static String prettyPrint(javax.json.JsonObject jsonObject) { return stringWriter.toString(); } + public static javax.json.JsonObject getJsonObject(String serializedJson) { + try (StringReader rdr = new StringReader(serializedJson)) { + return Json.createReader(rdr).readObject(); + } + } } From e7636b64bb9dd41dd08d2bcc687c2dce01797875 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 3 Mar 2022 08:50:48 -0500 Subject: [PATCH 005/608] refactor common code to abstract base re: datacite xml --- .../command/impl/AbstractSubmitToArchiveCommand.java | 10 ++++++++++ .../command/impl/DuraCloudSubmitToArchiveCommand.java | 5 +---- .../impl/GoogleCloudSubmitToArchiveCommand.java | 5 +---- .../command/impl/LocalSubmitToArchiveCommand.java | 5 +---- 4 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index 77ea680598f..a235dd57d91 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -1,5 +1,7 @@ package edu.harvard.iq.dataverse.engine.command.impl; +import edu.harvard.iq.dataverse.DOIDataCiteRegisterService; +import edu.harvard.iq.dataverse.DataCitation; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.authorization.Permission; @@ -13,6 +15,7 @@ import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; +import java.nio.charset.Charset; import java.util.Date; import java.util.HashMap; import java.util.Map; @@ -72,5 +75,12 @@ public String describe() { return super.describe() + "DatasetVersion: [" + version.getId() + " (v" + version.getFriendlyVersionNumber()+")]"; } + + String getDataCiteXml(DatasetVersion dv) { + DataCitation dc = new DataCitation(dv); + Map metadata = dc.getDataCiteMetadata(); + return DOIDataCiteRegisterService.getMetadataFromDvObject(dv.getDataset().getGlobalId().asString(), metadata, + dv.getDataset()); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java index 468e99f24c1..e595940d2ff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java @@ -76,10 +76,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t store = storeManager.getPrimaryContentStore(); // Create space to copy archival files to store.createSpace(spaceName); - DataCitation dc = new DataCitation(dv); - Map metadata = dc.getDataCiteMetadata(); - String dataciteXml = DOIDataCiteRegisterService.getMetadataFromDvObject( - dv.getDataset().getGlobalId().asString(), metadata, dv.getDataset()); + String dataciteXml = getDataCiteXml(dv); MessageDigest messageDigest = MessageDigest.getInstance("MD5"); try (PipedInputStream dataciteIn = new PipedInputStream(); DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java index cb729a9807a..04d16784876 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java @@ -69,10 +69,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') .replace('.', '-').toLowerCase(); - DataCitation dc = new DataCitation(dv); - Map metadata = dc.getDataCiteMetadata(); - String dataciteXml = DOIDataCiteRegisterService.getMetadataFromDvObject( - dv.getDataset().getGlobalId().asString(), metadata, dv.getDataset()); + String dataciteXml = getDataCiteXml(dv); String blobIdString = null; MessageDigest messageDigest = MessageDigest.getInstance("MD5"); try (PipedInputStream dataciteIn = new PipedInputStream(); DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java index d87c3011c15..1f838efdd8e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java @@ -62,10 +62,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') .replace('.', '-').toLowerCase(); - DataCitation dc = new DataCitation(dv); - Map metadata = dc.getDataCiteMetadata(); - String dataciteXml = DOIDataCiteRegisterService.getMetadataFromDvObject( - dv.getDataset().getGlobalId().asString(), metadata, dv.getDataset()); + String dataciteXml = getDataCiteXml(dv); FileUtils.writeStringToFile(new File(localPath+"/"+spaceName + "-datacite.v" + dv.getFriendlyVersionNumber()+".xml"), dataciteXml); From 07910eca3d84fe9335ff6cf6c940ac4c6bb260be Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 3 Mar 2022 12:26:48 -0500 Subject: [PATCH 006/608] S3 archiver --- .../impl/S3SubmitToArchiveCommand.java | 235 ++++++++++++++++++ 1 file changed, 235 insertions(+) create mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java new file mode 100644 index 00000000000..07ef5ebb475 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java @@ -0,0 +1,235 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.DatasetLock.Reason; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.engine.command.Command; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.util.bagit.BagGenerator; +import edu.harvard.iq.dataverse.util.bagit.OREMap; +import edu.harvard.iq.dataverse.util.json.JsonUtil; +import edu.harvard.iq.dataverse.workflow.step.Failure; +import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.security.DigestInputStream; +import java.security.MessageDigest; +import java.util.Map; +import java.util.logging.Logger; + +import javax.json.JsonObject; + +import org.apache.commons.codec.binary.Hex; +import org.eclipse.microprofile.config.Config; +import org.eclipse.microprofile.config.ConfigProvider; + +import com.amazonaws.ClientConfiguration; +import com.amazonaws.auth.AWSCredentialsProviderChain; +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.auth.profile.ProfileCredentialsProvider; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import com.amazonaws.services.s3.model.ObjectMetadata; +import com.amazonaws.services.s3.model.PutObjectRequest; +import com.amazonaws.services.s3.transfer.TransferManager; +import com.amazonaws.services.s3.transfer.TransferManagerBuilder; + +@RequiredPermissions(Permission.PublishDataset) +public class S3SubmitToArchiveCommand extends AbstractSubmitToArchiveCommand implements Command { + + private static final Logger logger = Logger.getLogger(S3SubmitToArchiveCommand.class.getName()); + private static final String S3_CONFIG = ":S3ArchivalConfig"; + private static final String S3_PROFILE = ":S3ArchivalProfile"; + + private static final Config config = ConfigProvider.getConfig(); + private AmazonS3 s3 = null; + private TransferManager tm = null; + + public S3SubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { + super(aRequest, version); + } + + @Override + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, Map requestedSettings) { + logger.fine("In S3SubmitToArchiveCommand..."); + JsonObject configObject = null; + String profileName = requestedSettings.get(S3_PROFILE); + String bucketName = null; + logger.fine("Profile: " + profileName + " Config: " + configObject); + try { + configObject = JsonUtil.getJsonObject(requestedSettings.get(S3_CONFIG)); + bucketName = configObject.getString("bucket-name", null); + } catch (Exception e) { + logger.warning("Unable to parse " + S3_CONFIG + " setting as a Json object"); + } + if (configObject != null && profileName != null && bucketName != null) { + + s3 = createClient(configObject, profileName); + tm = TransferManagerBuilder.standard() + .withS3Client(s3) + .build(); + try { + + Dataset dataset = dv.getDataset(); + if (dataset.getLockFor(Reason.finalizePublication) == null) { + + String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') + .replace('.', '-').toLowerCase(); + String dataciteXml = getDataCiteXml(dv); + MessageDigest messageDigest = MessageDigest.getInstance("MD5"); + try (ByteArrayInputStream dataciteIn = new ByteArrayInputStream(dataciteXml.getBytes("UTF-8")); DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) { + // Add datacite.xml file + ObjectMetadata om = new ObjectMetadata(); + om.setContentLength(dataciteIn.available()); + String dcKey = spaceName + "/" + spaceName + "_datacite.v" + dv.getFriendlyVersionNumber()+".xml"; + tm.upload(new PutObjectRequest(bucketName, dcKey, digestInputStream, om)).waitForCompletion(); + String localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); + om = s3.getObjectMetadata(bucketName, dcKey); + if (!om.getContentMD5().equals(localchecksum)) { + logger.severe(om.getContentMD5() + " not equal to " + localchecksum + " for " + dcKey); + return new Failure("Error in transferring DataCite.xml file to S3", + "S3 Submission Failure: incomplete metadata transfer"); + } + + // Store BagIt file + String fileName = spaceName + ".v" + dv.getFriendlyVersionNumber() + ".zip"; + String bagKey = spaceName + "/" + fileName; + // Add BagIt ZIP file + // Google uses MD5 as one way to verify the + // transfer + messageDigest = MessageDigest.getInstance("MD5"); + // Generate bag + BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); + bagger.setAuthenticationKey(token.getTokenString()); + if (bagger.generateBag(fileName, false)) { + File bagFile = bagger.getBagFile(fileName); + + try (FileInputStream in = new FileInputStream(bagFile); DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest);) { + om = new ObjectMetadata(); + om.setContentLength(bagFile.length()); + + tm.upload(new PutObjectRequest(bucketName, bagKey, digestInputStream2, om)).waitForCompletion(); + localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); + om = s3.getObjectMetadata(bucketName, bagKey); + + if (!om.getContentMD5().equals(localchecksum)) { + logger.severe(om.getContentMD5() + " not equal to " + localchecksum + " for " + fileName); + return new Failure("Error in transferring DataCite.xml file to S3", + "S3 Submission Failure: incomplete metadata transfer"); + } + } catch (RuntimeException rte) { + logger.severe("Error creating Bag during S3 archiving: " + rte.getMessage()); + return new Failure("Error in generating Bag", + "S3 Submission Failure: archive file not created"); + } + + logger.fine("S3 Submission step: Content Transferred"); + + // Document the location of dataset archival copy location (actually the URL + // where you can + // view it as an admin) + + //Unsigned URL - gives location but not access without creds + dv.setArchivalCopyLocation(s3.getUrl(bucketName, bagKey).toString()); + } else { + logger.warning("Could not write local Bag file " + fileName); + return new Failure("S3 Archiver fail writing temp local bag"); + } + + } + } else { + logger.warning("S3 Archiver Submision Workflow aborted: Dataset locked for publication/pidRegister"); + return new Failure("Dataset locked"); + } + } catch (Exception e) { + logger.warning(e.getLocalizedMessage()); + e.printStackTrace(); + return new Failure("S3 Archiver Submission Failure", + e.getLocalizedMessage() + ": check log for details"); + + } + return WorkflowStepResult.OK; + } else { + return new Failure("GoogleCloud Submission not configured - no \":GoogleCloudBucket\" and/or \":GoogleCloudProject\"."); + } + } + + private AmazonS3 createClient(JsonObject configObject, String profileName) { + // get a standard client, using the standard way of configuration the credentials, etc. + AmazonS3ClientBuilder s3CB = AmazonS3ClientBuilder.standard(); + + ClientConfiguration cc = new ClientConfiguration(); + Integer poolSize = configObject.getInt("connection-pool-size", 256); + cc.setMaxConnections(poolSize); + s3CB.setClientConfiguration(cc); + + /** + * Pass in a URL pointing to your S3 compatible storage. + * For possible values see https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/client/builder/AwsClientBuilder.EndpointConfiguration.html + */ + String s3CEUrl = configObject.getString("custom-endpoint-url", ""); + /** + * Pass in a region to use for SigV4 signing of requests. + * Defaults to "dataverse" as it is not relevant for custom S3 implementations. + */ + String s3CERegion = configObject.getString("custom-endpoint-region", "dataverse"); + + // if the admin has set a system property (see below) we use this endpoint URL instead of the standard ones. + if (!s3CEUrl.isEmpty()) { + s3CB.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3CEUrl, s3CERegion)); + } + /** + * Pass in a boolean value if path style access should be used within the S3 client. + * Anything but case-insensitive "true" will lead to value of false, which is default value, too. + */ + Boolean s3pathStyleAccess = configObject.getBoolean("path-style-access", false); + // some custom S3 implementations require "PathStyleAccess" as they us a path, not a subdomain. default = false + s3CB.withPathStyleAccessEnabled(s3pathStyleAccess); + + /** + * Pass in a boolean value if payload signing should be used within the S3 client. + * Anything but case-insensitive "true" will lead to value of false, which is default value, too. + */ + Boolean s3payloadSigning = configObject.getBoolean("payload-signing",false); + /** + * Pass in a boolean value if chunked encoding should not be used within the S3 client. + * Anything but case-insensitive "false" will lead to value of true, which is default value, too. + */ + Boolean s3chunkedEncoding = configObject.getBoolean("chunked-encoding",true); + // Openstack SWIFT S3 implementations require "PayloadSigning" set to true. default = false + s3CB.setPayloadSigningEnabled(s3payloadSigning); + // Openstack SWIFT S3 implementations require "ChunkedEncoding" set to false. default = true + // Boolean is inverted, otherwise setting dataverse.files..chunked-encoding=false would result in leaving Chunked Encoding enabled + s3CB.setChunkedEncodingDisabled(!s3chunkedEncoding); + + /** + * Pass in a string value if this storage driver should use a non-default AWS S3 profile. + * The default is "default" which should work when only one profile exists. + */ + ProfileCredentialsProvider profileCredentials = new ProfileCredentialsProvider(profileName); + + // Try to retrieve credentials via Microprofile Config API, too. For production use, you should not use env + // vars or system properties to provide these, but use the secrets config source provided by Payara. + AWSStaticCredentialsProvider staticCredentials = new AWSStaticCredentialsProvider( + new BasicAWSCredentials( + config.getOptionalValue("dataverse.s3archiver.access-key", String.class).orElse(""), + config.getOptionalValue("dataverse.s3archiver.secret-key", String.class).orElse("") + )); + + // Add both providers to chain - the first working provider will be used (so static credentials are the fallback) + AWSCredentialsProviderChain providerChain = new AWSCredentialsProviderChain(profileCredentials, staticCredentials); + s3CB.setCredentials(providerChain); + + // let's build the client :-) + AmazonS3 client = s3CB.build(); + return client; + } + +} From 70241596800b24600812020b8728186218d987bd Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 3 Mar 2022 14:42:50 -0500 Subject: [PATCH 007/608] Don't create md5 hashes since S3 won't use them or create ones to compare with our local one. I'l probably add this back in the DRS archiver where we'll have other means to send the hashes. --- .../impl/S3SubmitToArchiveCommand.java | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java index 07ef5ebb475..2520ace16ed 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java @@ -84,18 +84,16 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t .replace('.', '-').toLowerCase(); String dataciteXml = getDataCiteXml(dv); MessageDigest messageDigest = MessageDigest.getInstance("MD5"); - try (ByteArrayInputStream dataciteIn = new ByteArrayInputStream(dataciteXml.getBytes("UTF-8")); DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) { + try (ByteArrayInputStream dataciteIn = new ByteArrayInputStream(dataciteXml.getBytes("UTF-8"))) { // Add datacite.xml file ObjectMetadata om = new ObjectMetadata(); om.setContentLength(dataciteIn.available()); String dcKey = spaceName + "/" + spaceName + "_datacite.v" + dv.getFriendlyVersionNumber()+".xml"; - tm.upload(new PutObjectRequest(bucketName, dcKey, digestInputStream, om)).waitForCompletion(); - String localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); + tm.upload(new PutObjectRequest(bucketName, dcKey, dataciteIn, om)).waitForCompletion(); om = s3.getObjectMetadata(bucketName, dcKey); - if (!om.getContentMD5().equals(localchecksum)) { - logger.severe(om.getContentMD5() + " not equal to " + localchecksum + " for " + dcKey); - return new Failure("Error in transferring DataCite.xml file to S3", - "S3 Submission Failure: incomplete metadata transfer"); + if(om==null) { + logger.warning("Could not write datacite xml to S3"); + return new Failure("S3 Archiver failed writing datacite xml file"); } // Store BagIt file @@ -111,18 +109,17 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t if (bagger.generateBag(fileName, false)) { File bagFile = bagger.getBagFile(fileName); - try (FileInputStream in = new FileInputStream(bagFile); DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest);) { + try (FileInputStream in = new FileInputStream(bagFile)) { om = new ObjectMetadata(); om.setContentLength(bagFile.length()); - tm.upload(new PutObjectRequest(bucketName, bagKey, digestInputStream2, om)).waitForCompletion(); - localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); + tm.upload(new PutObjectRequest(bucketName, bagKey, in, om)).waitForCompletion(); om = s3.getObjectMetadata(bucketName, bagKey); - if (!om.getContentMD5().equals(localchecksum)) { - logger.severe(om.getContentMD5() + " not equal to " + localchecksum + " for " + fileName); - return new Failure("Error in transferring DataCite.xml file to S3", - "S3 Submission Failure: incomplete metadata transfer"); + if (om ==null) { + logger.severe("Error sending file to S3: " + fileName); + return new Failure("Error in transferring Bag file to S3", + "S3 Submission Failure: incomplete transfer"); } } catch (RuntimeException rte) { logger.severe("Error creating Bag during S3 archiving: " + rte.getMessage()); From 6c97b38164e840db993e4a09a7efb070172ec06c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 3 Mar 2022 14:43:13 -0500 Subject: [PATCH 008/608] Add QDR-developed version table addition for archiving --- src/main/webapp/dataset-versions.xhtml | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index 936c43d07a7..6cb8c11dff7 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -131,7 +131,7 @@ - + @@ -147,6 +147,17 @@ + + + + + + + + + + From 4f71ca4d3dd845082c0f56fde4675f83edc61672 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 3 Mar 2022 15:23:32 -0500 Subject: [PATCH 009/608] initial copy --- .../impl/DRSSubmitToArchiveCommand.java | 235 ++++++++++++++++++ 1 file changed, 235 insertions(+) create mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java new file mode 100644 index 00000000000..92f1c2ff71d --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -0,0 +1,235 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.DatasetLock.Reason; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.engine.command.Command; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.util.bagit.BagGenerator; +import edu.harvard.iq.dataverse.util.bagit.OREMap; +import edu.harvard.iq.dataverse.util.json.JsonUtil; +import edu.harvard.iq.dataverse.workflow.step.Failure; +import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.security.DigestInputStream; +import java.security.MessageDigest; +import java.util.Map; +import java.util.logging.Logger; + +import javax.json.JsonObject; + +import org.apache.commons.codec.binary.Hex; +import org.eclipse.microprofile.config.Config; +import org.eclipse.microprofile.config.ConfigProvider; + +import com.amazonaws.ClientConfiguration; +import com.amazonaws.auth.AWSCredentialsProviderChain; +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.auth.profile.ProfileCredentialsProvider; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import com.amazonaws.services.s3.model.ObjectMetadata; +import com.amazonaws.services.s3.model.PutObjectRequest; +import com.amazonaws.services.s3.transfer.TransferManager; +import com.amazonaws.services.s3.transfer.TransferManagerBuilder; + +@RequiredPermissions(Permission.PublishDataset) +public class DRSSubmitToArchiveCommand extends AbstractSubmitToArchiveCommand implements Command { + + private static final Logger logger = Logger.getLogger(DRSSubmitToArchiveCommand.class.getName()); + private static final String S3_CONFIG = ":S3ArchivalConfig"; + private static final String S3_PROFILE = ":S3ArchivalProfile"; + + private static final Config config = ConfigProvider.getConfig(); + private AmazonS3 s3 = null; + private TransferManager tm = null; + + public DRSSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { + super(aRequest, version); + } + + @Override + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, Map requestedSettings) { + logger.fine("In DRSSubmitToArchiveCommand..."); + JsonObject configObject = null; + String profileName = requestedSettings.get(S3_PROFILE); + String bucketName = null; + logger.fine("Profile: " + profileName + " Config: " + configObject); + try { + configObject = JsonUtil.getJsonObject(requestedSettings.get(S3_CONFIG)); + bucketName = configObject.getString("bucket-name", null); + } catch (Exception e) { + logger.warning("Unable to parse " + S3_CONFIG + " setting as a Json object"); + } + if (configObject != null && profileName != null && bucketName != null) { + + s3 = createClient(configObject, profileName); + tm = TransferManagerBuilder.standard() + .withS3Client(s3) + .build(); + try { + + Dataset dataset = dv.getDataset(); + if (dataset.getLockFor(Reason.finalizePublication) == null) { + + String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') + .replace('.', '-').toLowerCase(); + String dataciteXml = getDataCiteXml(dv); + MessageDigest messageDigest = MessageDigest.getInstance("MD5"); + try (ByteArrayInputStream dataciteIn = new ByteArrayInputStream(dataciteXml.getBytes("UTF-8")); DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) { + // Add datacite.xml file + ObjectMetadata om = new ObjectMetadata(); + om.setContentLength(dataciteIn.available()); + String dcKey = spaceName + "/" + spaceName + "_datacite.v" + dv.getFriendlyVersionNumber()+".xml"; + tm.upload(new PutObjectRequest(bucketName, dcKey, digestInputStream, om)).waitForCompletion(); + String localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); + om = s3.getObjectMetadata(bucketName, dcKey); + if (!om.getContentMD5().equals(localchecksum)) { + logger.severe(om.getContentMD5() + " not equal to " + localchecksum + " for " + dcKey); + return new Failure("Error in transferring DataCite.xml file to S3", + "S3 Submission Failure: incomplete metadata transfer"); + } + + // Store BagIt file + String fileName = spaceName + ".v" + dv.getFriendlyVersionNumber() + ".zip"; + String bagKey = spaceName + "/" + fileName; + // Add BagIt ZIP file + // Google uses MD5 as one way to verify the + // transfer + messageDigest = MessageDigest.getInstance("MD5"); + // Generate bag + BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); + bagger.setAuthenticationKey(token.getTokenString()); + if (bagger.generateBag(fileName, false)) { + File bagFile = bagger.getBagFile(fileName); + + try (FileInputStream in = new FileInputStream(bagFile); DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest);) { + om = new ObjectMetadata(); + om.setContentLength(bagFile.length()); + + tm.upload(new PutObjectRequest(bucketName, bagKey, digestInputStream2, om)).waitForCompletion(); + localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); + om = s3.getObjectMetadata(bucketName, bagKey); + + if (!om.getContentMD5().equals(localchecksum)) { + logger.severe(om.getContentMD5() + " not equal to " + localchecksum + " for " + fileName); + return new Failure("Error in transferring DataCite.xml file to S3", + "S3 Submission Failure: incomplete metadata transfer"); + } + } catch (RuntimeException rte) { + logger.severe("Error creating Bag during S3 archiving: " + rte.getMessage()); + return new Failure("Error in generating Bag", + "S3 Submission Failure: archive file not created"); + } + + logger.fine("S3 Submission step: Content Transferred"); + + // Document the location of dataset archival copy location (actually the URL + // where you can + // view it as an admin) + + //Unsigned URL - gives location but not access without creds + dv.setArchivalCopyLocation(s3.getUrl(bucketName, bagKey).toString()); + } else { + logger.warning("Could not write local Bag file " + fileName); + return new Failure("S3 Archiver fail writing temp local bag"); + } + + } + } else { + logger.warning("S3 Archiver Submision Workflow aborted: Dataset locked for publication/pidRegister"); + return new Failure("Dataset locked"); + } + } catch (Exception e) { + logger.warning(e.getLocalizedMessage()); + e.printStackTrace(); + return new Failure("S3 Archiver Submission Failure", + e.getLocalizedMessage() + ": check log for details"); + + } + return WorkflowStepResult.OK; + } else { + return new Failure("GoogleCloud Submission not configured - no \":GoogleCloudBucket\" and/or \":GoogleCloudProject\"."); + } + } + + private AmazonS3 createClient(JsonObject configObject, String profileName) { + // get a standard client, using the standard way of configuration the credentials, etc. + AmazonS3ClientBuilder s3CB = AmazonS3ClientBuilder.standard(); + + ClientConfiguration cc = new ClientConfiguration(); + Integer poolSize = configObject.getInt("connection-pool-size", 256); + cc.setMaxConnections(poolSize); + s3CB.setClientConfiguration(cc); + + /** + * Pass in a URL pointing to your S3 compatible storage. + * For possible values see https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/client/builder/AwsClientBuilder.EndpointConfiguration.html + */ + String s3CEUrl = configObject.getString("custom-endpoint-url", ""); + /** + * Pass in a region to use for SigV4 signing of requests. + * Defaults to "dataverse" as it is not relevant for custom S3 implementations. + */ + String s3CERegion = configObject.getString("custom-endpoint-region", "dataverse"); + + // if the admin has set a system property (see below) we use this endpoint URL instead of the standard ones. + if (!s3CEUrl.isEmpty()) { + s3CB.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3CEUrl, s3CERegion)); + } + /** + * Pass in a boolean value if path style access should be used within the S3 client. + * Anything but case-insensitive "true" will lead to value of false, which is default value, too. + */ + Boolean s3pathStyleAccess = configObject.getBoolean("path-style-access", false); + // some custom S3 implementations require "PathStyleAccess" as they us a path, not a subdomain. default = false + s3CB.withPathStyleAccessEnabled(s3pathStyleAccess); + + /** + * Pass in a boolean value if payload signing should be used within the S3 client. + * Anything but case-insensitive "true" will lead to value of false, which is default value, too. + */ + Boolean s3payloadSigning = configObject.getBoolean("payload-signing",false); + /** + * Pass in a boolean value if chunked encoding should not be used within the S3 client. + * Anything but case-insensitive "false" will lead to value of true, which is default value, too. + */ + Boolean s3chunkedEncoding = configObject.getBoolean("chunked-encoding",true); + // Openstack SWIFT S3 implementations require "PayloadSigning" set to true. default = false + s3CB.setPayloadSigningEnabled(s3payloadSigning); + // Openstack SWIFT S3 implementations require "ChunkedEncoding" set to false. default = true + // Boolean is inverted, otherwise setting dataverse.files..chunked-encoding=false would result in leaving Chunked Encoding enabled + s3CB.setChunkedEncodingDisabled(!s3chunkedEncoding); + + /** + * Pass in a string value if this storage driver should use a non-default AWS S3 profile. + * The default is "default" which should work when only one profile exists. + */ + ProfileCredentialsProvider profileCredentials = new ProfileCredentialsProvider(profileName); + + // Try to retrieve credentials via Microprofile Config API, too. For production use, you should not use env + // vars or system properties to provide these, but use the secrets config source provided by Payara. + AWSStaticCredentialsProvider staticCredentials = new AWSStaticCredentialsProvider( + new BasicAWSCredentials( + config.getOptionalValue("dataverse.s3archiver.access-key", String.class).orElse(""), + config.getOptionalValue("dataverse.s3archiver.secret-key", String.class).orElse("") + )); + + // Add both providers to chain - the first working provider will be used (so static credentials are the fallback) + AWSCredentialsProviderChain providerChain = new AWSCredentialsProviderChain(profileCredentials, staticCredentials); + s3CB.setCredentials(providerChain); + + // let's build the client :-) + AmazonS3 client = s3CB.build(); + return client; + } + +} From 041155d20d5840b04d0c4f508e116b663898dcc3 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 4 Mar 2022 15:23:53 -0500 Subject: [PATCH 010/608] simple DRS archiver, refactor S3 archiver --- .../impl/DRSSubmitToArchiveCommand.java | 263 ++++++------------ .../impl/S3SubmitToArchiveCommand.java | 195 +++++++------ 2 files changed, 191 insertions(+), 267 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 92f1c2ff71d..52be7dbba6e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -2,234 +2,135 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.DatasetLock.Reason; +import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; -import edu.harvard.iq.dataverse.util.bagit.BagGenerator; -import edu.harvard.iq.dataverse.util.bagit.OREMap; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; import java.io.ByteArrayInputStream; -import java.io.File; -import java.io.FileInputStream; -import java.security.DigestInputStream; -import java.security.MessageDigest; +import java.io.IOException; +import java.io.UnsupportedEncodingException; import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; import java.util.logging.Logger; +import javax.json.Json; import javax.json.JsonObject; +import javax.json.JsonObjectBuilder; +import javax.json.JsonValue; -import org.apache.commons.codec.binary.Hex; import org.eclipse.microprofile.config.Config; import org.eclipse.microprofile.config.ConfigProvider; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.auth.AWSCredentialsProviderChain; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.auth.profile.ProfileCredentialsProvider; -import com.amazonaws.client.builder.AwsClientBuilder; import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3ClientBuilder; import com.amazonaws.services.s3.model.ObjectMetadata; import com.amazonaws.services.s3.model.PutObjectRequest; import com.amazonaws.services.s3.transfer.TransferManager; -import com.amazonaws.services.s3.transfer.TransferManagerBuilder; @RequiredPermissions(Permission.PublishDataset) -public class DRSSubmitToArchiveCommand extends AbstractSubmitToArchiveCommand implements Command { +public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implements Command { private static final Logger logger = Logger.getLogger(DRSSubmitToArchiveCommand.class.getName()); - private static final String S3_CONFIG = ":S3ArchivalConfig"; - private static final String S3_PROFILE = ":S3ArchivalProfile"; + private static final String DRS_CONFIG = ":DRSArchivalConfig"; private static final Config config = ConfigProvider.getConfig(); private AmazonS3 s3 = null; private TransferManager tm = null; - + public DRSSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version); } @Override - public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, Map requestedSettings) { + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, + Map requestedSettings) { logger.fine("In DRSSubmitToArchiveCommand..."); - JsonObject configObject = null; - String profileName = requestedSettings.get(S3_PROFILE); - String bucketName = null; - logger.fine("Profile: " + profileName + " Config: " + configObject); + JsonObject drsConfigObject = null; + try { - configObject = JsonUtil.getJsonObject(requestedSettings.get(S3_CONFIG)); - bucketName = configObject.getString("bucket-name", null); + drsConfigObject = JsonUtil.getJsonObject(requestedSettings.get(DRS_CONFIG)); } catch (Exception e) { - logger.warning("Unable to parse " + S3_CONFIG + " setting as a Json object"); + logger.warning("Unable to parse " + DRS_CONFIG + " setting as a Json object"); } - if (configObject != null && profileName != null && bucketName != null) { - - s3 = createClient(configObject, profileName); - tm = TransferManagerBuilder.standard() - .withS3Client(s3) - .build(); - try { - - Dataset dataset = dv.getDataset(); - if (dataset.getLockFor(Reason.finalizePublication) == null) { - - String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') - .replace('.', '-').toLowerCase(); - String dataciteXml = getDataCiteXml(dv); - MessageDigest messageDigest = MessageDigest.getInstance("MD5"); - try (ByteArrayInputStream dataciteIn = new ByteArrayInputStream(dataciteXml.getBytes("UTF-8")); DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) { + if (drsConfigObject != null) { + Set collections = drsConfigObject.getJsonObject("collections").keySet(); + Dataset dataset = dv.getDataset(); + Dataverse ancestor = dataset.getOwner(); + String alias = ancestor.getAlias(); + while (ancestor != null && !collections.contains(alias)) { + ancestor = ancestor.getOwner(); + if (ancestor != null) { + alias = ancestor.getAlias(); + } else { + alias = null; + } + } + if (alias != null) { + JsonObject collectionConfig = drsConfigObject.getJsonObject("collections").getJsonObject(alias); + + WorkflowStepResult s3Result = super.performArchiveSubmission(dv, token, requestedSettings); + + if (s3Result == WorkflowStepResult.OK) { + // Now contact DRS + JsonObjectBuilder job = Json.createObjectBuilder(drsConfigObject); + job.remove("collections"); + for (Entry entry : collectionConfig.entrySet()) { + job.add(entry.getKey(), entry.getValue()); + } + + String drsConfigString = JsonUtil.prettyPrint(job.build()); + try (ByteArrayInputStream configIn = new ByteArrayInputStream(drsConfigString.getBytes("UTF-8"))) { // Add datacite.xml file ObjectMetadata om = new ObjectMetadata(); - om.setContentLength(dataciteIn.available()); - String dcKey = spaceName + "/" + spaceName + "_datacite.v" + dv.getFriendlyVersionNumber()+".xml"; - tm.upload(new PutObjectRequest(bucketName, dcKey, digestInputStream, om)).waitForCompletion(); - String localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); + om.setContentLength(configIn.available()); + String dcKey = getSpaceName(dataset) + "/drsConfig." + getSpaceName(dataset) + "_v" + + dv.getFriendlyVersionNumber() + ".json"; + tm.upload(new PutObjectRequest(bucketName, dcKey, configIn, om)).waitForCompletion(); om = s3.getObjectMetadata(bucketName, dcKey); - if (!om.getContentMD5().equals(localchecksum)) { - logger.severe(om.getContentMD5() + " not equal to " + localchecksum + " for " + dcKey); - return new Failure("Error in transferring DataCite.xml file to S3", - "S3 Submission Failure: incomplete metadata transfer"); - } - - // Store BagIt file - String fileName = spaceName + ".v" + dv.getFriendlyVersionNumber() + ".zip"; - String bagKey = spaceName + "/" + fileName; - // Add BagIt ZIP file - // Google uses MD5 as one way to verify the - // transfer - messageDigest = MessageDigest.getInstance("MD5"); - // Generate bag - BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); - bagger.setAuthenticationKey(token.getTokenString()); - if (bagger.generateBag(fileName, false)) { - File bagFile = bagger.getBagFile(fileName); - - try (FileInputStream in = new FileInputStream(bagFile); DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest);) { - om = new ObjectMetadata(); - om.setContentLength(bagFile.length()); - - tm.upload(new PutObjectRequest(bucketName, bagKey, digestInputStream2, om)).waitForCompletion(); - localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); - om = s3.getObjectMetadata(bucketName, bagKey); - - if (!om.getContentMD5().equals(localchecksum)) { - logger.severe(om.getContentMD5() + " not equal to " + localchecksum + " for " + fileName); - return new Failure("Error in transferring DataCite.xml file to S3", - "S3 Submission Failure: incomplete metadata transfer"); - } - } catch (RuntimeException rte) { - logger.severe("Error creating Bag during S3 archiving: " + rte.getMessage()); - return new Failure("Error in generating Bag", - "S3 Submission Failure: archive file not created"); - } - - logger.fine("S3 Submission step: Content Transferred"); - - // Document the location of dataset archival copy location (actually the URL - // where you can - // view it as an admin) - - //Unsigned URL - gives location but not access without creds - dv.setArchivalCopyLocation(s3.getUrl(bucketName, bagKey).toString()); - } else { - logger.warning("Could not write local Bag file " + fileName); - return new Failure("S3 Archiver fail writing temp local bag"); + } catch (RuntimeException rte) { + logger.warning("Error creating DRS Config file during DRS archiving: " + rte.getMessage()); + return new Failure("Error in generating Config file", + "DRS Submission Failure: config file not created"); + } catch (InterruptedException e) { + logger.warning("DRS Archiver failure: " + e.getLocalizedMessage()); + e.printStackTrace(); + return new Failure("DRS Archiver fail in config transfer"); + } catch (UnsupportedEncodingException e1) { + logger.warning("UTF-8 not supported!"); + } catch (IOException e1) { + logger.warning("Failure creating ByteArrayInputStream from string!"); } - - } - } else { - logger.warning("S3 Archiver Submision Workflow aborted: Dataset locked for publication/pidRegister"); - return new Failure("Dataset locked"); + + logger.fine("DRS Submission step: Config Transferred"); + + // Document the location of dataset archival copy location (actually the URL + // where you can + // view it as an admin) + + // Unsigned URL - gives location but not access without creds + } else { + + logger.warning("DRS: S3 archiving failed - will not send config: " + getSpaceName(dataset) + "_v" + + dv.getFriendlyVersionNumber()); + return new Failure("DRS Archiver fail in initial S3 Archiver transfer"); } - } catch (Exception e) { - logger.warning(e.getLocalizedMessage()); - e.printStackTrace(); - return new Failure("S3 Archiver Submission Failure", - e.getLocalizedMessage() + ": check log for details"); + } else { + logger.fine("DRS Archiver: No matching collection found - will not archive: " + getSpaceName(dataset) + + "_v" + dv.getFriendlyVersionNumber()); + return WorkflowStepResult.OK; } - return WorkflowStepResult.OK; - } else { - return new Failure("GoogleCloud Submission not configured - no \":GoogleCloudBucket\" and/or \":GoogleCloudProject\"."); - } - } - private AmazonS3 createClient(JsonObject configObject, String profileName) { - // get a standard client, using the standard way of configuration the credentials, etc. - AmazonS3ClientBuilder s3CB = AmazonS3ClientBuilder.standard(); - - ClientConfiguration cc = new ClientConfiguration(); - Integer poolSize = configObject.getInt("connection-pool-size", 256); - cc.setMaxConnections(poolSize); - s3CB.setClientConfiguration(cc); - - /** - * Pass in a URL pointing to your S3 compatible storage. - * For possible values see https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/client/builder/AwsClientBuilder.EndpointConfiguration.html - */ - String s3CEUrl = configObject.getString("custom-endpoint-url", ""); - /** - * Pass in a region to use for SigV4 signing of requests. - * Defaults to "dataverse" as it is not relevant for custom S3 implementations. - */ - String s3CERegion = configObject.getString("custom-endpoint-region", "dataverse"); - - // if the admin has set a system property (see below) we use this endpoint URL instead of the standard ones. - if (!s3CEUrl.isEmpty()) { - s3CB.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3CEUrl, s3CERegion)); + } else { + logger.warning(DRS_CONFIG + " not found"); + return new Failure("DRS Submission not configured - no " + DRS_CONFIG + " found."); } - /** - * Pass in a boolean value if path style access should be used within the S3 client. - * Anything but case-insensitive "true" will lead to value of false, which is default value, too. - */ - Boolean s3pathStyleAccess = configObject.getBoolean("path-style-access", false); - // some custom S3 implementations require "PathStyleAccess" as they us a path, not a subdomain. default = false - s3CB.withPathStyleAccessEnabled(s3pathStyleAccess); - - /** - * Pass in a boolean value if payload signing should be used within the S3 client. - * Anything but case-insensitive "true" will lead to value of false, which is default value, too. - */ - Boolean s3payloadSigning = configObject.getBoolean("payload-signing",false); - /** - * Pass in a boolean value if chunked encoding should not be used within the S3 client. - * Anything but case-insensitive "false" will lead to value of true, which is default value, too. - */ - Boolean s3chunkedEncoding = configObject.getBoolean("chunked-encoding",true); - // Openstack SWIFT S3 implementations require "PayloadSigning" set to true. default = false - s3CB.setPayloadSigningEnabled(s3payloadSigning); - // Openstack SWIFT S3 implementations require "ChunkedEncoding" set to false. default = true - // Boolean is inverted, otherwise setting dataverse.files..chunked-encoding=false would result in leaving Chunked Encoding enabled - s3CB.setChunkedEncodingDisabled(!s3chunkedEncoding); - - /** - * Pass in a string value if this storage driver should use a non-default AWS S3 profile. - * The default is "default" which should work when only one profile exists. - */ - ProfileCredentialsProvider profileCredentials = new ProfileCredentialsProvider(profileName); - - // Try to retrieve credentials via Microprofile Config API, too. For production use, you should not use env - // vars or system properties to provide these, but use the secrets config source provided by Payara. - AWSStaticCredentialsProvider staticCredentials = new AWSStaticCredentialsProvider( - new BasicAWSCredentials( - config.getOptionalValue("dataverse.s3archiver.access-key", String.class).orElse(""), - config.getOptionalValue("dataverse.s3archiver.secret-key", String.class).orElse("") - )); - - // Add both providers to chain - the first working provider will be used (so static credentials are the fallback) - AWSCredentialsProviderChain providerChain = new AWSCredentialsProviderChain(profileCredentials, staticCredentials); - s3CB.setCredentials(providerChain); - - // let's build the client :-) - AmazonS3 client = s3CB.build(); - return client; + return WorkflowStepResult.OK; } - } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java index 2520ace16ed..e352caa6944 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java @@ -49,49 +49,49 @@ public class S3SubmitToArchiveCommand extends AbstractSubmitToArchiveCommand imp private static final String S3_PROFILE = ":S3ArchivalProfile"; private static final Config config = ConfigProvider.getConfig(); - private AmazonS3 s3 = null; - private TransferManager tm = null; - + protected AmazonS3 s3 = null; + protected TransferManager tm = null; + private String spaceName = null; + protected String bucketName = null; + public S3SubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version); } @Override - public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, Map requestedSettings) { + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, + Map requestedSettings) { logger.fine("In S3SubmitToArchiveCommand..."); JsonObject configObject = null; String profileName = requestedSettings.get(S3_PROFILE); - String bucketName = null; + logger.fine("Profile: " + profileName + " Config: " + configObject); try { - configObject = JsonUtil.getJsonObject(requestedSettings.get(S3_CONFIG)); - bucketName = configObject.getString("bucket-name", null); + configObject = JsonUtil.getJsonObject(requestedSettings.get(S3_CONFIG)); + bucketName = configObject.getString("bucket-name", null); } catch (Exception e) { logger.warning("Unable to parse " + S3_CONFIG + " setting as a Json object"); } if (configObject != null && profileName != null && bucketName != null) { s3 = createClient(configObject, profileName); - tm = TransferManagerBuilder.standard() - .withS3Client(s3) - .build(); + tm = TransferManagerBuilder.standard().withS3Client(s3).build(); try { Dataset dataset = dv.getDataset(); if (dataset.getLockFor(Reason.finalizePublication) == null) { - String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') - .replace('.', '-').toLowerCase(); + spaceName = getSpaceName(dataset); String dataciteXml = getDataCiteXml(dv); - MessageDigest messageDigest = MessageDigest.getInstance("MD5"); try (ByteArrayInputStream dataciteIn = new ByteArrayInputStream(dataciteXml.getBytes("UTF-8"))) { // Add datacite.xml file ObjectMetadata om = new ObjectMetadata(); om.setContentLength(dataciteIn.available()); - String dcKey = spaceName + "/" + spaceName + "_datacite.v" + dv.getFriendlyVersionNumber()+".xml"; + String dcKey = spaceName + "/" + spaceName + "_datacite.v" + dv.getFriendlyVersionNumber() + + ".xml"; tm.upload(new PutObjectRequest(bucketName, dcKey, dataciteIn, om)).waitForCompletion(); om = s3.getObjectMetadata(bucketName, dcKey); - if(om==null) { + if (om == null) { logger.warning("Could not write datacite xml to S3"); return new Failure("S3 Archiver failed writing datacite xml file"); } @@ -102,47 +102,48 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t // Add BagIt ZIP file // Google uses MD5 as one way to verify the // transfer - messageDigest = MessageDigest.getInstance("MD5"); - // Generate bag - BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); - bagger.setAuthenticationKey(token.getTokenString()); - if (bagger.generateBag(fileName, false)) { - File bagFile = bagger.getBagFile(fileName); - - try (FileInputStream in = new FileInputStream(bagFile)) { - om = new ObjectMetadata(); - om.setContentLength(bagFile.length()); - - tm.upload(new PutObjectRequest(bucketName, bagKey, in, om)).waitForCompletion(); - om = s3.getObjectMetadata(bucketName, bagKey); - - if (om ==null) { - logger.severe("Error sending file to S3: " + fileName); - return new Failure("Error in transferring Bag file to S3", - "S3 Submission Failure: incomplete transfer"); - } - } catch (RuntimeException rte) { - logger.severe("Error creating Bag during S3 archiving: " + rte.getMessage()); - return new Failure("Error in generating Bag", - "S3 Submission Failure: archive file not created"); - } - logger.fine("S3 Submission step: Content Transferred"); - - // Document the location of dataset archival copy location (actually the URL - // where you can - // view it as an admin) + // Generate bag + BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); + bagger.setAuthenticationKey(token.getTokenString()); + if (bagger.generateBag(fileName, false)) { + File bagFile = bagger.getBagFile(fileName); + + try (FileInputStream in = new FileInputStream(bagFile)) { + om = new ObjectMetadata(); + om.setContentLength(bagFile.length()); + + tm.upload(new PutObjectRequest(bucketName, bagKey, in, om)).waitForCompletion(); + om = s3.getObjectMetadata(bucketName, bagKey); + + if (om == null) { + logger.severe("Error sending file to S3: " + fileName); + return new Failure("Error in transferring Bag file to S3", + "S3 Submission Failure: incomplete transfer"); + } + } catch (RuntimeException rte) { + logger.severe("Error creating Bag during S3 archiving: " + rte.getMessage()); + return new Failure("Error in generating Bag", + "S3 Submission Failure: archive file not created"); + } + + logger.fine("S3 Submission step: Content Transferred"); + + // Document the location of dataset archival copy location (actually the URL + // where you can + // view it as an admin) + + // Unsigned URL - gives location but not access without creds + dv.setArchivalCopyLocation(s3.getUrl(bucketName, bagKey).toString()); + } else { + logger.warning("Could not write local Bag file " + fileName); + return new Failure("S3 Archiver fail writing temp local bag"); + } - //Unsigned URL - gives location but not access without creds - dv.setArchivalCopyLocation(s3.getUrl(bucketName, bagKey).toString()); - } else { - logger.warning("Could not write local Bag file " + fileName); - return new Failure("S3 Archiver fail writing temp local bag"); } - - } - } else { - logger.warning("S3 Archiver Submision Workflow aborted: Dataset locked for publication/pidRegister"); + } else { + logger.warning( + "S3 Archiver Submision Workflow aborted: Dataset locked for publication/pidRegister"); return new Failure("Dataset locked"); } } catch (Exception e) { @@ -154,78 +155,100 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } return WorkflowStepResult.OK; } else { - return new Failure("GoogleCloud Submission not configured - no \":GoogleCloudBucket\" and/or \":GoogleCloudProject\"."); + return new Failure( + "S3 Submission not configured - no \":S3ArchivalProfile\" and/or \":S3ArchivalConfig\" or no bucket-name defined in config."); } } + protected String getSpaceName(Dataset dataset) { + if (spaceName == null) { + spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-').replace('.', '-') + .toLowerCase(); + } + return spaceName; + } + private AmazonS3 createClient(JsonObject configObject, String profileName) { - // get a standard client, using the standard way of configuration the credentials, etc. + // get a standard client, using the standard way of configuration the + // credentials, etc. AmazonS3ClientBuilder s3CB = AmazonS3ClientBuilder.standard(); ClientConfiguration cc = new ClientConfiguration(); Integer poolSize = configObject.getInt("connection-pool-size", 256); cc.setMaxConnections(poolSize); s3CB.setClientConfiguration(cc); - + /** - * Pass in a URL pointing to your S3 compatible storage. - * For possible values see https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/client/builder/AwsClientBuilder.EndpointConfiguration.html + * Pass in a URL pointing to your S3 compatible storage. For possible values see + * https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/client/builder/AwsClientBuilder.EndpointConfiguration.html */ String s3CEUrl = configObject.getString("custom-endpoint-url", ""); /** - * Pass in a region to use for SigV4 signing of requests. - * Defaults to "dataverse" as it is not relevant for custom S3 implementations. + * Pass in a region to use for SigV4 signing of requests. Defaults to + * "dataverse" as it is not relevant for custom S3 implementations. */ String s3CERegion = configObject.getString("custom-endpoint-region", "dataverse"); - // if the admin has set a system property (see below) we use this endpoint URL instead of the standard ones. + // if the admin has set a system property (see below) we use this endpoint URL + // instead of the standard ones. if (!s3CEUrl.isEmpty()) { s3CB.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3CEUrl, s3CERegion)); } /** - * Pass in a boolean value if path style access should be used within the S3 client. - * Anything but case-insensitive "true" will lead to value of false, which is default value, too. + * Pass in a boolean value if path style access should be used within the S3 + * client. Anything but case-insensitive "true" will lead to value of false, + * which is default value, too. */ Boolean s3pathStyleAccess = configObject.getBoolean("path-style-access", false); - // some custom S3 implementations require "PathStyleAccess" as they us a path, not a subdomain. default = false + // some custom S3 implementations require "PathStyleAccess" as they us a path, + // not a subdomain. default = false s3CB.withPathStyleAccessEnabled(s3pathStyleAccess); /** - * Pass in a boolean value if payload signing should be used within the S3 client. - * Anything but case-insensitive "true" will lead to value of false, which is default value, too. + * Pass in a boolean value if payload signing should be used within the S3 + * client. Anything but case-insensitive "true" will lead to value of false, + * which is default value, too. */ - Boolean s3payloadSigning = configObject.getBoolean("payload-signing",false); + Boolean s3payloadSigning = configObject.getBoolean("payload-signing", false); /** - * Pass in a boolean value if chunked encoding should not be used within the S3 client. - * Anything but case-insensitive "false" will lead to value of true, which is default value, too. + * Pass in a boolean value if chunked encoding should not be used within the S3 + * client. Anything but case-insensitive "false" will lead to value of true, + * which is default value, too. */ - Boolean s3chunkedEncoding = configObject.getBoolean("chunked-encoding",true); - // Openstack SWIFT S3 implementations require "PayloadSigning" set to true. default = false + Boolean s3chunkedEncoding = configObject.getBoolean("chunked-encoding", true); + // Openstack SWIFT S3 implementations require "PayloadSigning" set to true. + // default = false s3CB.setPayloadSigningEnabled(s3payloadSigning); - // Openstack SWIFT S3 implementations require "ChunkedEncoding" set to false. default = true - // Boolean is inverted, otherwise setting dataverse.files..chunked-encoding=false would result in leaving Chunked Encoding enabled + // Openstack SWIFT S3 implementations require "ChunkedEncoding" set to false. + // default = true + // Boolean is inverted, otherwise setting + // dataverse.files..chunked-encoding=false would result in leaving Chunked + // Encoding enabled s3CB.setChunkedEncodingDisabled(!s3chunkedEncoding); /** - * Pass in a string value if this storage driver should use a non-default AWS S3 profile. - * The default is "default" which should work when only one profile exists. + * Pass in a string value if this storage driver should use a non-default AWS S3 + * profile. The default is "default" which should work when only one profile + * exists. */ ProfileCredentialsProvider profileCredentials = new ProfileCredentialsProvider(profileName); - // Try to retrieve credentials via Microprofile Config API, too. For production use, you should not use env - // vars or system properties to provide these, but use the secrets config source provided by Payara. - AWSStaticCredentialsProvider staticCredentials = new AWSStaticCredentialsProvider( - new BasicAWSCredentials( + // Try to retrieve credentials via Microprofile Config API, too. For production + // use, you should not use env + // vars or system properties to provide these, but use the secrets config source + // provided by Payara. + AWSStaticCredentialsProvider staticCredentials = new AWSStaticCredentialsProvider(new BasicAWSCredentials( config.getOptionalValue("dataverse.s3archiver.access-key", String.class).orElse(""), - config.getOptionalValue("dataverse.s3archiver.secret-key", String.class).orElse("") - )); - - // Add both providers to chain - the first working provider will be used (so static credentials are the fallback) - AWSCredentialsProviderChain providerChain = new AWSCredentialsProviderChain(profileCredentials, staticCredentials); + config.getOptionalValue("dataverse.s3archiver.secret-key", String.class).orElse(""))); + + // Add both providers to chain - the first working provider will be used (so + // static credentials are the fallback) + AWSCredentialsProviderChain providerChain = new AWSCredentialsProviderChain(profileCredentials, + staticCredentials); s3CB.setCredentials(providerChain); - + // let's build the client :-) - AmazonS3 client = s3CB.build(); + AmazonS3 client = s3CB.build(); return client; } From 1dbdc1b85d8c4e063582b2aa42e233efee594272 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 4 Mar 2022 16:41:35 -0500 Subject: [PATCH 011/608] fix validation of file --- .../edu/harvard/iq/dataverse/util/bagit/BagGenerator.java | 4 ++-- .../harvard/iq/dataverse/util/bagit/BagValidationJob.java | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index 958e61f33e6..47f0287f18e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -903,8 +903,8 @@ public void incrementTotalDataSize(long inc) { totalDataSize += inc; } - public String getHashtype() { - return hashtype.toString(); + public ChecksumType getHashtype() { + return hashtype; } // Get's all "Has Part" children, standardized to send an array with 0,1, or diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidationJob.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidationJob.java index 7a32b96f4a0..fb5507c1b56 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidationJob.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidationJob.java @@ -25,6 +25,7 @@ import org.apache.commons.compress.archivers.zip.ZipFile; import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.DataFile.ChecksumType; import org.apache.commons.compress.utils.IOUtils; @@ -41,7 +42,7 @@ public class BagValidationJob implements Runnable { private String hash; private String name; - private static String hashtype; + private static ChecksumType hashtype; public BagValidationJob(String value, String key) throws IllegalStateException { if (zf == null || bagGenerator == null) { @@ -64,7 +65,7 @@ public void run() { if (hash.equals(realHash)) { log.fine("Valid hash for " + name); } else { - log.severe("Invalid " + bagGenerator.getHashtype() + " for " + name); + log.severe("Invalid " + bagGenerator.getHashtype().name() + " for " + name); log.fine("As sent: " + hash); log.fine("As calculated: " + realHash); } @@ -89,7 +90,7 @@ private String generateFileHash(String name, ZipFile zf) { } else if (hashtype.equals(DataFile.ChecksumType.MD5)) { realHash = DigestUtils.md5Hex(inputStream); } else { - log.warning("Unknown hash type: " + hashtype); + log.warning("Unknown hash type: " + hashtype.name()); } } catch (ZipException e) { From 7c8b18fd5a45d312fbe7fd06e930b61d0fa7950f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 7 Mar 2022 13:29:00 -0500 Subject: [PATCH 012/608] Use separate ZipFile in executor, fix path issue breaking validation --- .../iq/dataverse/util/bagit/BagGenerator.java | 65 +++++++++++-------- .../util/bagit/BagValidationJob.java | 17 +++-- 2 files changed, 52 insertions(+), 30 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index 47f0287f18e..118b44e0b58 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -359,6 +359,7 @@ public boolean generateBag(String bagName, boolean temp) { // Create an output stream backed by the file bagFileOS = new FileOutputStream(bagFile); if (generateBag(bagFileOS)) { + //The generateBag call sets this.bagName to the correct value validateBagFile(bagFile); if (usetemp) { logger.fine("Moving tmp zip"); @@ -384,7 +385,8 @@ public void validateBag(String bagId) { ZipFile zf = null; InputStream is = null; try { - zf = new ZipFile(getBagFile(bagId)); + File bagFile = getBagFile(bagId); + zf = new ZipFile(bagFile); ZipArchiveEntry entry = zf.getEntry(getValidName(bagId) + "/manifest-sha1.txt"); if (entry != null) { logger.info("SHA1 hashes used"); @@ -424,7 +426,7 @@ public void validateBag(String bagId) { } IOUtils.closeQuietly(is); logger.info("HashMap Map contains: " + checksumMap.size() + " entries"); - checkFiles(checksumMap, zf); + checkFiles(checksumMap, bagFile); } catch (IOException io) { logger.log(Level.SEVERE,"Could not validate Hashes", io); } catch (Exception e) { @@ -453,14 +455,14 @@ public File getBagFile(String bagID) throws Exception { private void validateBagFile(File bagFile) throws IOException { // Run a confirmation test - should verify all files and hashes - ZipFile zf = new ZipFile(bagFile); + // Check files calculates the hashes and file sizes and reports on // whether hashes are correct - checkFiles(checksumMap, zf); + checkFiles(checksumMap, bagFile); logger.info("Data Count: " + dataCount); logger.info("Data Size: " + totalDataSize); - zf.close(); + //zf.close(); } public static String getValidName(String bagName) { @@ -477,7 +479,7 @@ private void processContainer(JsonObject item, String currentPath) throws IOExce } else if (item.has(JsonLDTerm.schemaOrg("name").getLabel())) { title = item.get(JsonLDTerm.schemaOrg("name").getLabel()).getAsString(); } - + logger.fine("Adding " + title + "/ to path " + currentPath); currentPath = currentPath + title + "/"; int containerIndex = -1; try { @@ -553,6 +555,7 @@ private void processContainer(JsonObject item, String currentPath) throws IOExce logger.warning("Duplicate/Collision: " + child.get("@id").getAsString() + " has SHA1 Hash: " + childHash + " in: " + bagID); } + logger.fine("Adding " + childPath + " with hash " + childHash + " to checksumMap"); checksumMap.put(childPath, childHash); } } @@ -696,29 +699,39 @@ private void createFileFromURL(final String relPath, final String uri) addEntry(archiveEntry, supp); } - private void checkFiles(HashMap shaMap, ZipFile zf) { + private void checkFiles(HashMap shaMap, File bagFile) { ExecutorService executor = Executors.newFixedThreadPool(numConnections); - BagValidationJob.setZipFile(zf); - BagValidationJob.setBagGenerator(this); - logger.fine("Validating hashes for zipped data files"); - int i = 0; - for (Entry entry : shaMap.entrySet()) { - BagValidationJob vj = new BagValidationJob(entry.getValue(), entry.getKey()); - executor.execute(vj); - i++; - if (i % 1000 == 0) { - logger.info("Queuing Hash Validations: " + i); - } - } - logger.fine("All Hash Validations Queued: " + i); - - executor.shutdown(); + ZipFile zf = null; try { - while (!executor.awaitTermination(10, TimeUnit.MINUTES)) { - logger.fine("Awaiting completion of hash calculations."); + zf = new ZipFile(bagFile); + + BagValidationJob.setZipFile(zf); + BagValidationJob.setBagGenerator(this); + logger.fine("Validating hashes for zipped data files"); + int i = 0; + for (Entry entry : shaMap.entrySet()) { + BagValidationJob vj = new BagValidationJob(bagName, entry.getValue(), entry.getKey()); + executor.execute(vj); + i++; + if (i % 1000 == 0) { + logger.info("Queuing Hash Validations: " + i); + } } - } catch (InterruptedException e) { - logger.log(Level.SEVERE,"Hash Calculations interrupted", e); + logger.fine("All Hash Validations Queued: " + i); + + executor.shutdown(); + try { + while (!executor.awaitTermination(10, TimeUnit.MINUTES)) { + logger.fine("Awaiting completion of hash calculations."); + } + } catch (InterruptedException e) { + logger.log(Level.SEVERE, "Hash Calculations interrupted", e); + } + } catch (IOException e1) { + // TODO Auto-generated catch block + e1.printStackTrace(); + } finally { + IOUtils.closeQuietly(zf); } logger.fine("Hash Validations Completed"); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidationJob.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidationJob.java index fb5507c1b56..7ac9fd701b8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidationJob.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidationJob.java @@ -42,13 +42,15 @@ public class BagValidationJob implements Runnable { private String hash; private String name; + private String basePath; private static ChecksumType hashtype; - public BagValidationJob(String value, String key) throws IllegalStateException { + public BagValidationJob(String bagName, String value, String key) throws IllegalStateException { if (zf == null || bagGenerator == null) { throw new IllegalStateException( "Static Zipfile and BagGenerator must be set before creating ValidationJobs"); } + basePath=bagName; hash = value; name = key; @@ -61,7 +63,7 @@ public BagValidationJob(String value, String key) throws IllegalStateException { */ public void run() { - String realHash = generateFileHash(name, zf); + String realHash = generateFileHash(basePath + "/" + name, zf); if (hash.equals(realHash)) { log.fine("Valid hash for " + name); } else { @@ -73,12 +75,16 @@ public void run() { private String generateFileHash(String name, ZipFile zf) { + String realHash = null; + ZipArchiveEntry archiveEntry1 = zf.getEntry(name); + + if(archiveEntry1 != null) { // Error check - add file sizes to compare against supplied stats - + log.fine("Getting stream for " + name); long start = System.currentTimeMillis(); InputStream inputStream = null; - String realHash = null; + try { inputStream = zf.getInputStream(archiveEntry1); if (hashtype.equals(DataFile.ChecksumType.SHA1)) { @@ -105,6 +111,9 @@ private String generateFileHash(String name, ZipFile zf) { log.fine("Retrieve/compute time = " + (System.currentTimeMillis() - start) + " ms"); // Error check - add file sizes to compare against supplied stats bagGenerator.incrementTotalDataSize(archiveEntry1.getSize()); + } else { + log.warning("Entry " + name + " not found in zipped bag: not validated"); + } return realHash; } From 4892ab8419f3752e4fa6022efb8580a144b6beef Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 7 Mar 2022 13:31:23 -0500 Subject: [PATCH 013/608] update commons-codec probably not required - good practice/helped in debugging to be able to check sourcecode online which is for the current version. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index dc3a0111bf5..a92ff1d259e 100644 --- a/pom.xml +++ b/pom.xml @@ -542,7 +542,7 @@ commons-codec commons-codec - 1.9 + 1.15 From f63bbd859278b8b5d28ba22c07aae370a8c12984 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 7 Mar 2022 13:33:19 -0500 Subject: [PATCH 014/608] fix bag name, remove digest imports note that the write to file part of the bag generation already tests the hash values of the individual files internally. --- .../engine/command/impl/S3SubmitToArchiveCommand.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java index e352caa6944..3009e422037 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java @@ -17,14 +17,11 @@ import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; -import java.security.DigestInputStream; -import java.security.MessageDigest; import java.util.Map; import java.util.logging.Logger; import javax.json.JsonObject; -import org.apache.commons.codec.binary.Hex; import org.eclipse.microprofile.config.Config; import org.eclipse.microprofile.config.ConfigProvider; @@ -97,8 +94,8 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } // Store BagIt file - String fileName = spaceName + ".v" + dv.getFriendlyVersionNumber() + ".zip"; - String bagKey = spaceName + "/" + fileName; + String fileName = spaceName + ".v" + dv.getFriendlyVersionNumber(); + String bagKey = spaceName + "/" + fileName + ".zip"; // Add BagIt ZIP file // Google uses MD5 as one way to verify the // transfer From 2a6042998302dc92170c86bbc32f127f6782a619 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 7 Mar 2022 13:34:16 -0500 Subject: [PATCH 015/608] refactor, add isArchivable method --- .../impl/AbstractSubmitToArchiveCommand.java | 8 +-- .../impl/DRSSubmitToArchiveCommand.java | 53 ++++++++++++------- 2 files changed, 40 insertions(+), 21 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index a235dd57d91..e919f81e6e9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -2,8 +2,9 @@ import edu.harvard.iq.dataverse.DOIDataCiteRegisterService; import edu.harvard.iq.dataverse.DataCitation; +import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.SettingsWrapper; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; @@ -15,8 +16,6 @@ import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; -import java.nio.charset.Charset; -import java.util.Date; import java.util.HashMap; import java.util.Map; import java.util.logging.Logger; @@ -83,4 +82,7 @@ String getDataCiteXml(DatasetVersion dv) { dv.getDataset()); } + public static boolean isArchivable(Dataset dataset, SettingsWrapper settingsWrapper) { + return true; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 52be7dbba6e..e82fe66c8c8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -3,6 +3,7 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.SettingsWrapper; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.engine.command.Command; @@ -25,13 +26,8 @@ import javax.json.JsonObjectBuilder; import javax.json.JsonValue; -import org.eclipse.microprofile.config.Config; -import org.eclipse.microprofile.config.ConfigProvider; - -import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.ObjectMetadata; import com.amazonaws.services.s3.model.PutObjectRequest; -import com.amazonaws.services.s3.transfer.TransferManager; @RequiredPermissions(Permission.PublishDataset) public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implements Command { @@ -39,10 +35,6 @@ public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implemen private static final Logger logger = Logger.getLogger(DRSSubmitToArchiveCommand.class.getName()); private static final String DRS_CONFIG = ":DRSArchivalConfig"; - private static final Config config = ConfigProvider.getConfig(); - private AmazonS3 s3 = null; - private TransferManager tm = null; - public DRSSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version); } @@ -62,15 +54,8 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t Set collections = drsConfigObject.getJsonObject("collections").keySet(); Dataset dataset = dv.getDataset(); Dataverse ancestor = dataset.getOwner(); - String alias = ancestor.getAlias(); - while (ancestor != null && !collections.contains(alias)) { - ancestor = ancestor.getOwner(); - if (ancestor != null) { - alias = ancestor.getAlias(); - } else { - alias = null; - } - } + String alias = getArchivableAncestor(ancestor, collections); + if (alias != null) { JsonObject collectionConfig = drsConfigObject.getJsonObject("collections").getJsonObject(alias); @@ -95,6 +80,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t om = s3.getObjectMetadata(bucketName, dcKey); } catch (RuntimeException rte) { logger.warning("Error creating DRS Config file during DRS archiving: " + rte.getMessage()); + rte.printStackTrace(); return new Failure("Error in generating Config file", "DRS Submission Failure: config file not created"); } catch (InterruptedException e) { @@ -133,4 +119,35 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } return WorkflowStepResult.OK; } + + private static String getArchivableAncestor(Dataverse ancestor, Set collections) { + String alias = ancestor.getAlias(); + while (ancestor != null && !collections.contains(alias)) { + ancestor = ancestor.getOwner(); + if (ancestor != null) { + alias = ancestor.getAlias(); + } else { + alias = null; + } + } + return null; + } + + public static boolean isArchivable(Dataset d, SettingsWrapper sw) { + JsonObject drsConfigObject = null; + + try { + String config = sw.get(DRS_CONFIG, null); + if(config!=null) { + drsConfigObject = JsonUtil.getJsonObject(config); + } + } catch (Exception e) { + logger.warning("Unable to parse " + DRS_CONFIG + " setting as a Json object"); + } + if (drsConfigObject != null) { + Set collections = drsConfigObject.getJsonObject("collections").keySet(); + return getArchivableAncestor(d.getOwner(),collections)!=null; + } + return false; + } } From 63cf130f82ef0dea579c3038fe4bd012c4eac038 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 7 Mar 2022 13:35:17 -0500 Subject: [PATCH 016/608] Reflexive call of isArchivable method on archive provider class --- .../edu/harvard/iq/dataverse/DatasetPage.java | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 61720efafb2..8d31c3895ea 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -62,6 +62,8 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; import java.sql.Timestamp; import java.text.SimpleDateFormat; import java.time.Instant; @@ -5478,10 +5480,8 @@ public void refreshPaginator() { */ public void archiveVersion(Long id) { if (session.getUser() instanceof AuthenticatedUser) { - AuthenticatedUser au = ((AuthenticatedUser) session.getUser()); - DatasetVersion dv = datasetVersionService.retrieveDatasetVersionByVersionId(id).getDatasetVersion(); - String className = settingsService.getValueForKey(SettingsServiceBean.Key.ArchiverClassName); + String className = settingsWrapper.getValueForKey(SettingsServiceBean.Key.ArchiverClassName, null); AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dv); if (cmd != null) { try { @@ -5505,6 +5505,24 @@ public void archiveVersion(Long id) { } } } + + boolean isArchiveable() { + String className = settingsWrapper.getValueForKey(SettingsServiceBean.Key.ArchiverClassName, null); + if (className != null) { + try { + Class clazz = Class.forName(className); + + Method m = clazz.getMethod("isArchivable", Dataset.class, SettingsWrapper.class); + Object[] params = { dataset, settingsWrapper }; + return (Boolean) m.invoke(null, params); + } catch (ClassNotFoundException | IllegalAccessException | IllegalArgumentException + | InvocationTargetException | NoSuchMethodException | SecurityException e) { + logger.warning("Failed to call is Archivable on configured archiver class: " + className); + e.printStackTrace(); + } + } + return false; + } private static Date getFileDateToCompare(FileMetadata fileMetadata) { DataFile datafile = fileMetadata.getDataFile(); From 7852cc4ccd0b5af64e381921bbab5b8fcfd28152 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 7 Mar 2022 13:37:03 -0500 Subject: [PATCH 017/608] only display archiving column when needed there are archived copies, or one can archive this dataset --- src/main/webapp/dataset-versions.xhtml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index 6cb8c11dff7..c9c90d17619 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -147,12 +147,12 @@ - + - From e3da7576530ff9b371a5a5300a1f25e719511d81 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 9 Mar 2022 10:03:19 -0500 Subject: [PATCH 018/608] typos in per-collection display logic --- src/main/webapp/dataset-versions.xhtml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index c9c90d17619..e105ac30df7 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -147,12 +147,12 @@ - + - From 54557ada77ee912ded47ed3549f8b6dfbd4cd083 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 9 Mar 2022 17:29:22 -0500 Subject: [PATCH 019/608] Bug fix in refactored collection check --- .../engine/command/impl/DRSSubmitToArchiveCommand.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index e82fe66c8c8..cb88f9e030e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -130,7 +130,7 @@ private static String getArchivableAncestor(Dataverse ancestor, Set coll alias = null; } } - return null; + return alias; } public static boolean isArchivable(Dataset d, SettingsWrapper sw) { From a04c1be0f08bd8adf6b87bab497cf73ea187cc55 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 9 Mar 2022 17:29:41 -0500 Subject: [PATCH 020/608] make isArchivable public so it can be used in .xhtml --- src/main/java/edu/harvard/iq/dataverse/DatasetPage.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 18e051946e8..919a5c50666 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -5506,12 +5506,11 @@ public void archiveVersion(Long id) { } } - boolean isArchiveable() { + public boolean isArchivable() { String className = settingsWrapper.getValueForKey(SettingsServiceBean.Key.ArchiverClassName, null); if (className != null) { try { Class clazz = Class.forName(className); - Method m = clazz.getMethod("isArchivable", Dataset.class, SettingsWrapper.class); Object[] params = { dataset, settingsWrapper }; return (Boolean) m.invoke(null, params); From 4c0fce0dc7ee63e246b80097232608bf72ff3f28 Mon Sep 17 00:00:00 2001 From: roberttreacy Date: Wed, 16 Mar 2022 18:21:59 -0400 Subject: [PATCH 021/608] rename getQueryParametersForUrl to handleRequest remove some experimental code --- .../dataverse/externaltools/ExternalToolHandler.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java index ff616d08a4f..84d5b75e34c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java @@ -115,12 +115,12 @@ public String getLocaleCode() { } // TODO: rename to handleRequest() to someday handle sending headers as well as query parameters. - public String getQueryParametersForUrl() { - return getQueryParametersForUrl(false); + public String handleRequest() { + return handleRequest(false); } // TODO: rename to handleRequest() to someday handle sending headers as well as query parameters. - public String getQueryParametersForUrl(boolean preview) { + public String handleRequest(boolean preview) { requestMethod = requestMethod(); if (requestMethod().equals(HttpMethod.POST)){ try { @@ -335,12 +335,12 @@ public String requestMethod(){ return HttpMethod.GET; } public String getToolUrlWithQueryParams() { - String params = getQueryParametersForUrl(); + String params = ExternalToolHandler.this.handleRequest(); return toolContext + params; } public String getToolUrlForPreviewMode() { - return externalTool.getToolUrl() + getQueryParametersForUrl(true); + return externalTool.getToolUrl() + handleRequest(true); } public ExternalTool getExternalTool() { From 36fb9854d8f8a731092d7db9313fa91f5709b20e Mon Sep 17 00:00:00 2001 From: roberttreacy Date: Wed, 16 Mar 2022 18:22:44 -0400 Subject: [PATCH 022/608] rename getQueryParametersForUrl to handleRequest --- .../externaltools/ExternalToolHandlerTest.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandlerTest.java b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandlerTest.java index c900c7e2523..8e70934b4ad 100644 --- a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandlerTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandlerTest.java @@ -111,7 +111,7 @@ public void testGetToolUrlWithOptionalQueryParameters() { ApiToken apiToken = new ApiToken(); apiToken.setTokenString("7196b5ce-f200-4286-8809-03ffdbc255d7"); ExternalToolHandler externalToolHandler3 = new ExternalToolHandler(externalTool, dataFile, apiToken, fmd, nullLocaleCode); - String result3 = externalToolHandler3.getQueryParametersForUrl(); + String result3 = externalToolHandler3.handleRequest(); System.out.println("result3: " + result3); assertEquals("?key1=42&key2=7196b5ce-f200-4286-8809-03ffdbc255d7", result3); @@ -131,7 +131,7 @@ public void testGetToolUrlWithOptionalQueryParameters() { ) .build().toString()); ExternalToolHandler externalToolHandler6 = new ExternalToolHandler(externalTool, dataFile, apiToken, fmd, nullLocaleCode); - String result6 = externalToolHandler6.getQueryParametersForUrl(); + String result6 = externalToolHandler6.handleRequest(); System.out.println("result6: " + result6); assertEquals("?key1=42&key2=7196b5ce-f200-4286-8809-03ffdbc255d7&key3=2", result6); @@ -147,7 +147,7 @@ public void testGetToolUrlWithOptionalQueryParameters() { ) .build().toString()); ExternalToolHandler externalToolHandler4 = new ExternalToolHandler(externalTool, dataFile, nullApiToken, fmd, nullLocaleCode); - String result4 = externalToolHandler4.getQueryParametersForUrl(); + String result4 = externalToolHandler4.handleRequest(); System.out.println("result4: " + result4); assertEquals("?key1=42", result4); @@ -169,7 +169,7 @@ public void testGetToolUrlWithOptionalQueryParameters() { ) .build().toString()); ExternalToolHandler externalToolHandler7 = new ExternalToolHandler(externalTool, dataFile, apiToken, fmd, "en"); - String result7 = externalToolHandler7.getQueryParametersForUrl(); + String result7 = externalToolHandler7.handleRequest(); System.out.println("result7: " + result7); assertEquals("?key1=42&key2=7196b5ce-f200-4286-8809-03ffdbc255d7&key3=2&key4=en", result7); @@ -187,7 +187,7 @@ public void testGetToolUrlWithOptionalQueryParameters() { Exception expectedException = null; try { ExternalToolHandler externalToolHandler5 = new ExternalToolHandler(externalTool, dataFile, nullApiToken, fmd, nullLocaleCode); - String result5 = externalToolHandler5.getQueryParametersForUrl(); + String result5 = externalToolHandler5.handleRequest(); System.out.println("result5: " + result5); } catch (Exception ex) { System.out.println("Exception caught: " + ex); From b90216f28491634029a37490966f1b97f59d0cdb Mon Sep 17 00:00:00 2001 From: Robert Treacy Date: Wed, 16 Mar 2022 18:36:40 -0400 Subject: [PATCH 023/608] add UrlSignerUtil.java --- .../iq/dataverse/util/UrlSignerUtil.java | 150 ++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java diff --git a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java new file mode 100644 index 00000000000..1da1797a8ae --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java @@ -0,0 +1,150 @@ +package edu.harvard.iq.dataverse.util; + +import java.net.URL; +import java.nio.charset.Charset; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.http.NameValuePair; +import org.apache.http.client.utils.URLEncodedUtils; +import org.joda.time.LocalDateTime; + +/** + * Simple class to sign/validate URLs. + * + */ +public class UrlSignerUtil { + + private static final Logger logger = Logger.getLogger(UrlSignerUtil.class.getName()); + + /** + * + * @param baseUrl - the URL to sign - cannot contain query params + * "until","user", "method", or "token" + * @param timeout - how many minutes to make the URL valid for (note - time skew + * between the creator and receiver could affect the validation + * @param user - a string representing the user - should be understood by the + * creator/receiver + * @param method - one of the HTTP methods + * @param key - a secret key shared by the creator/receiver. In Dataverse + * this could be an APIKey (when sending URL to a tool that will + * use it to retrieve info from Dataverse) + * @return - the signed URL + */ + public static String signUrl(String baseUrl, Integer timeout, String user, String method, String key) { + StringBuilder signedUrl = new StringBuilder(baseUrl); + + boolean firstParam = true; + if (baseUrl.contains("?")) { + firstParam = false; + } + if (timeout != null) { + LocalDateTime validTime = LocalDateTime.now(); + validTime = validTime.plusMinutes(timeout); + validTime.toString(); + signedUrl.append(firstParam ? "?" : "&").append("until=").append(validTime); + firstParam=false; + } + if (user != null) { + signedUrl.append(firstParam ? "?" : "&").append("user=").append(user); + firstParam=false; + } + if (method != null) { + signedUrl.append(firstParam ? "?" : "&").append("method=").append(method); + } + signedUrl.append("&token="); + logger.fine("String to sign: " + signedUrl.toString() + ""); + signedUrl.append(DigestUtils.sha512Hex(signedUrl.toString() + key)); + logger.fine("Generated Signed URL: " + signedUrl.toString()); + if (logger.isLoggable(Level.FINE)) { + logger.fine( + "URL signature is " + (isValidUrl(signedUrl.toString(), method, user, key) ? "valid" : "invalid")); + } + return signedUrl.toString(); + } + + /** + * This method will only return true if the URL and parameters except the + * "token" are unchanged from the original/match the values sent to this method, + * and the "token" parameter matches what this method recalculates using the + * shared key THe method also assures that the "until" timestamp is after the + * current time. + * + * @param signedUrl - the signed URL as received from Dataverse + * @param method - an HTTP method. If provided, the method in the URL must + * match + * @param user - a string representing the user, if provided the value must + * match the one in the url + * @param key - the shared secret key to be used in validation + * @return - true if valid, false if not: e.g. the key is not the same as the + * one used to generate the "token" any part of the URL preceding the + * "token" has been altered the method doesn't match (e.g. the server + * has received a POST request and the URL only allows GET) the user + * string doesn't match (e.g. the server knows user A is logged in, but + * the URL is only for user B) the url has expired (was used after the + * until timestamp) + */ + public static boolean isValidUrl(String signedUrl, String method, String user, String key) { + boolean valid = true; + try { + URL url = new URL(signedUrl); + List params = URLEncodedUtils.parse(url.getQuery(), Charset.forName("UTF-8")); + String hash = null; + String dateString = null; + String allowedMethod = null; + String allowedUser = null; + for (NameValuePair nvp : params) { + if (nvp.getName().equals("token")) { + hash = nvp.getValue(); + logger.fine("Hash: " + hash); + } + if (nvp.getName().equals("until")) { + dateString = nvp.getValue(); + logger.fine("Until: " + dateString); + } + if (nvp.getName().equals("method")) { + allowedMethod = nvp.getValue(); + logger.fine("Method: " + allowedMethod); + } + if (nvp.getName().equals("user")) { + allowedUser = nvp.getValue(); + logger.fine("User: " + allowedUser); + } + } + + int index = signedUrl.indexOf("&token="); + // Assuming the token is last - doesn't have to be, but no reason for the URL + // params to be rearranged either, and this should only cause false negatives if + // it does happen + String urlToHash = signedUrl.substring(0, index + 7); + logger.fine("String to hash: " + urlToHash + ""); + String newHash = DigestUtils.sha512Hex(urlToHash + key); + logger.fine("Calculated Hash: " + newHash); + if (!hash.equals(newHash)) { + logger.fine("Hash doesn't match"); + valid = false; + } + if (dateString != null && LocalDateTime.parse(dateString).isBefore(LocalDateTime.now())) { + logger.fine("Url is expired"); + valid = false; + } + if (method != null && !method.equals(allowedMethod)) { + logger.fine("Method doesn't match"); + valid = false; + } + if (user != null && !user.equals(allowedUser)) { + logger.fine("User doesn't match"); + valid = false; + } + } catch (Throwable t) { + // Want to catch anything like null pointers, etc. to force valid=false upon any + // error + logger.warning("Bad URL: " + signedUrl + " : " + t.getMessage()); + valid = false; + } + return valid; + } + +} \ No newline at end of file From 29c95995106e6f6716d7e3f2f4564646eeb680a5 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 18 Mar 2022 09:58:22 -0400 Subject: [PATCH 024/608] add method to get URL (same signature as Dataset in 3b - inherit?) --- src/main/java/edu/harvard/iq/dataverse/Dataverse.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java index 342aaec187a..db5f9d172cd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java @@ -5,6 +5,8 @@ import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.search.savedsearch.SavedSearch; import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.SystemConfig; + import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; @@ -765,4 +767,8 @@ public boolean isAncestorOf( DvObject other ) { } return false; } + + public String getLocalURL() { + return SystemConfig.getDataverseSiteUrlStatic() + "/dataverse/" + this.getAlias(); + } } From 12e74d90742e93ff66225ab87afd54fbc8fee95c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 18 Mar 2022 09:58:46 -0400 Subject: [PATCH 025/608] change to PID URL, add owner info --- .../iq/dataverse/util/bagit/OREMap.java | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java index 38a04b36314..637ff2ccfff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java @@ -8,6 +8,7 @@ import edu.harvard.iq.dataverse.DatasetFieldServiceBean; import edu.harvard.iq.dataverse.DatasetFieldType; import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.TermsOfUseAndAccess; import edu.harvard.iq.dataverse.branding.BrandingUtil; @@ -85,7 +86,7 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) throws Except localContext.putIfAbsent(JsonLDNamespace.schema.getPrefix(), JsonLDNamespace.schema.getUrl()); Dataset dataset = version.getDataset(); - String id = dataset.getGlobalId().asString(); + String id = dataset.getGlobalId().toURL().toExternalForm(); JsonArrayBuilder fileArray = Json.createArrayBuilder(); // The map describes an aggregation JsonObjectBuilder aggBuilder = Json.createObjectBuilder(); @@ -213,7 +214,9 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) throws Except } aggBuilder.add(JsonLDTerm.schemaOrg("includedInDataCatalog").getLabel(), - BrandingUtil.getRootDataverseCollectionName()); + BrandingUtil.getInstallationBrandName()); + + aggBuilder.add(JsonLDTerm.schemaOrg("isPartOf").getLabel(), getDataverseDescription(dataset.getOwner())); // The aggregation aggregates aggregatedresources (Datafiles) which each have // their own entry and metadata @@ -315,6 +318,17 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) throws Except } } + private JsonObjectBuilder getDataverseDescription(Dataverse dv) { + //Schema.org is already in local context, no updates needed as long as we only use chemaOrg and "@id" here + JsonObjectBuilder dvjob = Json.createObjectBuilder().add(JsonLDTerm.schemaOrg("name").getLabel(), dv.getCurrentName()).add("@id", dv.getLocalURL()); + addIfNotNull(dvjob, JsonLDTerm.schemaOrg("description"), dv.getDescription()); + Dataverse owner = dv.getOwner(); + if(owner!=null) { + dvjob.add(JsonLDTerm.schemaOrg("isPartOf").getLabel(), getDataverseDescription(owner)); + } + return dvjob; + } + /* * Simple methods to only add an entry to JSON if the value of the term is * non-null. Methods created for string, JsonValue, boolean, and long From a53f94f0d9616e15a4608b150aea7159f7b361c0 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sun, 3 Apr 2022 17:07:27 -0400 Subject: [PATCH 026/608] update to use DRS defined bucket param --- .../dataverse/engine/command/impl/S3SubmitToArchiveCommand.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java index 3009e422037..391a2f7c94a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java @@ -65,7 +65,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t logger.fine("Profile: " + profileName + " Config: " + configObject); try { configObject = JsonUtil.getJsonObject(requestedSettings.get(S3_CONFIG)); - bucketName = configObject.getString("bucket-name", null); + bucketName = configObject.getString("s3_bucket_name", null); } catch (Exception e) { logger.warning("Unable to parse " + S3_CONFIG + " setting as a Json object"); } From fcc78a5d9d774e60442bc0128188eb90664c2020 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sun, 3 Apr 2022 17:08:17 -0400 Subject: [PATCH 027/608] call ingest endpoint --- .../impl/DRSSubmitToArchiveCommand.java | 107 +++++++++++------- 1 file changed, 67 insertions(+), 40 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index cb88f9e030e..32e95ed3c3a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -13,9 +13,10 @@ import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; -import java.io.ByteArrayInputStream; import java.io.IOException; -import java.io.UnsupportedEncodingException; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; import java.util.Map; import java.util.Map.Entry; import java.util.Set; @@ -26,14 +27,19 @@ import javax.json.JsonObjectBuilder; import javax.json.JsonValue; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.PutObjectRequest; +import org.apache.http.client.ClientProtocolException; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.entity.StringEntity; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; @RequiredPermissions(Permission.PublishDataset) public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implements Command { private static final Logger logger = Logger.getLogger(DRSSubmitToArchiveCommand.class.getName()); private static final String DRS_CONFIG = ":DRSArchivalConfig"; + private static String PENDING = "Pending"; public DRSSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version); @@ -55,71 +61,92 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t Dataset dataset = dv.getDataset(); Dataverse ancestor = dataset.getOwner(); String alias = getArchivableAncestor(ancestor, collections); - + if (alias != null) { JsonObject collectionConfig = drsConfigObject.getJsonObject("collections").getJsonObject(alias); WorkflowStepResult s3Result = super.performArchiveSubmission(dv, token, requestedSettings); + JsonObjectBuilder statusObject = Json.createObjectBuilder(); + statusObject.add("status", "Failure"); + statusObject.add("message", "Bag not transferred"); + if (s3Result == WorkflowStepResult.OK) { + statusObject.add("status", "Attempted"); + statusObject.add("message", "Bag transferred"); + // Now contact DRS JsonObjectBuilder job = Json.createObjectBuilder(drsConfigObject); job.remove("collections"); + job.remove("DRSendpoint"); + String spaceName = getSpaceName(dataset); + job.add("package_id", spaceName + ".v" + dv.getFriendlyVersionNumber()); + + job.add("s3_path", spaceName); for (Entry entry : collectionConfig.entrySet()) { job.add(entry.getKey(), entry.getValue()); } String drsConfigString = JsonUtil.prettyPrint(job.build()); - try (ByteArrayInputStream configIn = new ByteArrayInputStream(drsConfigString.getBytes("UTF-8"))) { - // Add datacite.xml file - ObjectMetadata om = new ObjectMetadata(); - om.setContentLength(configIn.available()); - String dcKey = getSpaceName(dataset) + "/drsConfig." + getSpaceName(dataset) + "_v" - + dv.getFriendlyVersionNumber() + ".json"; - tm.upload(new PutObjectRequest(bucketName, dcKey, configIn, om)).waitForCompletion(); - om = s3.getObjectMetadata(bucketName, dcKey); - } catch (RuntimeException rte) { - logger.warning("Error creating DRS Config file during DRS archiving: " + rte.getMessage()); - rte.printStackTrace(); - return new Failure("Error in generating Config file", - "DRS Submission Failure: config file not created"); - } catch (InterruptedException e) { - logger.warning("DRS Archiver failure: " + e.getLocalizedMessage()); - e.printStackTrace(); - return new Failure("DRS Archiver fail in config transfer"); - } catch (UnsupportedEncodingException e1) { - logger.warning("UTF-8 not supported!"); - } catch (IOException e1) { - logger.warning("Failure creating ByteArrayInputStream from string!"); - } - - logger.fine("DRS Submission step: Config Transferred"); - // Document the location of dataset archival copy location (actually the URL - // where you can - // view it as an admin) + CloseableHttpClient client = HttpClients.createDefault(); + HttpPost ingestPost; + try { + ingestPost = new HttpPost(); + ingestPost.setURI(new URI(drsConfigObject.getString("DRSendpoint"))); + String body = drsConfigString; + logger.fine("Body: " + body); + ingestPost.setEntity(new StringEntity(body, "utf-8")); + ingestPost.setHeader("Content-Type", "application/json"); + + } catch (URISyntaxException e) { + return new Failure( + "LDNAnnounceDatasetVersion workflow step failed: unable to parse inbox in :LDNTarget setting."); + } + // execute + + try (CloseableHttpResponse response = client.execute(ingestPost)) { + int code = response.getStatusLine().getStatusCode(); + String responseBody = new String(response.getEntity().getContent().readAllBytes(), + StandardCharsets.UTF_8); + if (code >= 200 && code < 300) { + logger.fine("Status: " + code); + logger.fine("Response" + responseBody); + JsonObject responseObject = JsonUtil.getJsonObject(responseBody); + String status = responseObject.getString("status"); + if (!PENDING.equals(status)) { + logger.warning("Unexpected Status: " + status); + } else { + logger.fine("DRS Ingest succeded: " + responseObject.toString()); + statusObject.add("status", status); + statusObject.add("message", responseObject.getString("message")); + } + } + } catch (ClientProtocolException e2) { + e2.printStackTrace(); + } catch (IOException e2) { + e2.printStackTrace(); + } - // Unsigned URL - gives location but not access without creds } else { - logger.warning("DRS: S3 archiving failed - will not send config: " + getSpaceName(dataset) + "_v" + logger.warning("DRS: S3 archiving failed - will not call ingest: " + getSpaceName(dataset) + "_v" + dv.getFriendlyVersionNumber()); return new Failure("DRS Archiver fail in initial S3 Archiver transfer"); } - + dv.setArchivalCopyLocation(statusObject.build().toString()); } else { logger.fine("DRS Archiver: No matching collection found - will not archive: " + getSpaceName(dataset) + "_v" + dv.getFriendlyVersionNumber()); return WorkflowStepResult.OK; } - } else { logger.warning(DRS_CONFIG + " not found"); return new Failure("DRS Submission not configured - no " + DRS_CONFIG + " found."); } return WorkflowStepResult.OK; } - + private static String getArchivableAncestor(Dataverse ancestor, Set collections) { String alias = ancestor.getAlias(); while (ancestor != null && !collections.contains(alias)) { @@ -138,15 +165,15 @@ public static boolean isArchivable(Dataset d, SettingsWrapper sw) { try { String config = sw.get(DRS_CONFIG, null); - if(config!=null) { - drsConfigObject = JsonUtil.getJsonObject(config); + if (config != null) { + drsConfigObject = JsonUtil.getJsonObject(config); } } catch (Exception e) { logger.warning("Unable to parse " + DRS_CONFIG + " setting as a Json object"); } if (drsConfigObject != null) { Set collections = drsConfigObject.getJsonObject("collections").keySet(); - return getArchivableAncestor(d.getOwner(),collections)!=null; + return getArchivableAncestor(d.getOwner(), collections) != null; } return false; } From 8bd83cd00c97a839356316d5eb0fe8b98cac8be5 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sun, 3 Apr 2022 19:09:27 -0400 Subject: [PATCH 028/608] reminder --- .../engine/command/impl/DRSSubmitToArchiveCommand.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 32e95ed3c3a..101d31f4cc7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -89,6 +89,8 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t String drsConfigString = JsonUtil.prettyPrint(job.build()); + + //TODO - ADD code to ignore self-signed cert CloseableHttpClient client = HttpClients.createDefault(); HttpPost ingestPost; try { From aee2ebb8a818404d129a679c38e52ec5a798952d Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Apr 2022 09:27:12 -0400 Subject: [PATCH 029/608] update Json format to match /ingest, add flag to trust cert --- .../impl/DRSSubmitToArchiveCommand.java | 99 +++++++++++++++---- 1 file changed, 79 insertions(+), 20 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 101d31f4cc7..fba236457a6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -17,6 +17,9 @@ import java.net.URI; import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; +import java.security.KeyManagementException; +import java.security.KeyStoreException; +import java.security.NoSuchAlgorithmException; import java.util.Map; import java.util.Map.Entry; import java.util.Set; @@ -26,20 +29,31 @@ import javax.json.JsonObject; import javax.json.JsonObjectBuilder; import javax.json.JsonValue; +import javax.net.ssl.SSLContext; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpPost; +import org.apache.http.conn.ssl.NoopHostnameVerifier; +import org.apache.http.conn.ssl.SSLConnectionSocketFactory; +import org.apache.http.conn.ssl.TrustAllStrategy; import org.apache.http.entity.StringEntity; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; +import org.apache.http.ssl.SSLContextBuilder; @RequiredPermissions(Permission.PublishDataset) public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implements Command { private static final Logger logger = Logger.getLogger(DRSSubmitToArchiveCommand.class.getName()); private static final String DRS_CONFIG = ":DRSArchivalConfig"; - private static String PENDING = "Pending"; + private static final String FAILURE = "failure"; + private static final String PENDING = "pending"; + private static final String ADMIN_METADATA = "admin_metadata"; + private static final String S3_BUCKET_NAME = "s3_bucket_name"; + private static final String COLLECTIONS = "collections"; + private static final String PACKAGE_ID = "package_id"; + private static final String TRUST_CERT = "trust_cert"; public DRSSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version); @@ -57,13 +71,15 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t logger.warning("Unable to parse " + DRS_CONFIG + " setting as a Json object"); } if (drsConfigObject != null) { - Set collections = drsConfigObject.getJsonObject("collections").keySet(); + Set collections = drsConfigObject.getJsonObject(COLLECTIONS).keySet(); Dataset dataset = dv.getDataset(); Dataverse ancestor = dataset.getOwner(); String alias = getArchivableAncestor(ancestor, collections); + String spaceName = getSpaceName(dataset); + String packageId = spaceName + ".v" + dv.getFriendlyVersionNumber(); if (alias != null) { - JsonObject collectionConfig = drsConfigObject.getJsonObject("collections").getJsonObject(alias); + JsonObject collectionConfig = drsConfigObject.getJsonObject(COLLECTIONS).getJsonObject(alias); WorkflowStepResult s3Result = super.performArchiveSubmission(dv, token, requestedSettings); @@ -77,21 +93,56 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t // Now contact DRS JsonObjectBuilder job = Json.createObjectBuilder(drsConfigObject); - job.remove("collections"); + JsonObjectBuilder amob = Json.createObjectBuilder(); + if (drsConfigObject.containsKey(ADMIN_METADATA)) { + amob = Json.createObjectBuilder(drsConfigObject.getJsonObject(ADMIN_METADATA)); + } + + boolean trustCert = drsConfigObject.getBoolean(TRUST_CERT, false); + job.remove(TRUST_CERT); + job.remove(COLLECTIONS); + job.remove(ADMIN_METADATA); job.remove("DRSendpoint"); - String spaceName = getSpaceName(dataset); - job.add("package_id", spaceName + ".v" + dv.getFriendlyVersionNumber()); + job.add(PACKAGE_ID, packageId); job.add("s3_path", spaceName); + if (collectionConfig.containsKey(S3_BUCKET_NAME)) { + job.add(S3_BUCKET_NAME, collectionConfig.get(S3_BUCKET_NAME)); + } + for (Entry entry : collectionConfig.entrySet()) { - job.add(entry.getKey(), entry.getValue()); + if (!entry.getKey().equals(S3_BUCKET_NAME)) { + amob.add(entry.getKey(), entry.getValue()); + } } + job.add(ADMIN_METADATA, amob); String drsConfigString = JsonUtil.prettyPrint(job.build()); - - //TODO - ADD code to ignore self-signed cert - CloseableHttpClient client = HttpClients.createDefault(); + // TODO - ADD code to ignore self-signed cert + CloseableHttpClient client = null; + if (trustCert) { + // use the TrustSelfSignedStrategy to allow Self Signed Certificates + try { + SSLContext sslContext = SSLContextBuilder + .create() + .loadTrustMaterial(new TrustAllStrategy()) + .build(); + client = HttpClients.custom().setSSLContext(sslContext).setSSLHostnameVerifier(NoopHostnameVerifier.INSTANCE).build(); + } catch (KeyManagementException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (NoSuchAlgorithmException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (KeyStoreException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + if(client == null) { + client = HttpClients.createDefault(); + } HttpPost ingestPost; try { ingestPost = new HttpPost(); @@ -111,18 +162,28 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t int code = response.getStatusLine().getStatusCode(); String responseBody = new String(response.getEntity().getContent().readAllBytes(), StandardCharsets.UTF_8); - if (code >= 200 && code < 300) { + if (code == 202) { logger.fine("Status: " + code); logger.fine("Response" + responseBody); JsonObject responseObject = JsonUtil.getJsonObject(responseBody); String status = responseObject.getString("status"); - if (!PENDING.equals(status)) { - logger.warning("Unexpected Status: " + status); - } else { - logger.fine("DRS Ingest succeded: " + responseObject.toString()); + switch (status) { + case PENDING: + logger.fine("DRS Ingest successfully started for: " + packageId + " : " + + responseObject.toString()); statusObject.add("status", status); statusObject.add("message", responseObject.getString("message")); + break; + case FAILURE: + logger.severe( + "DRS Ingest Failed for: " + packageId + " : " + responseObject.toString()); + return new Failure("DRS Archiver fail in Ingest call"); + default: + logger.warning("Unexpected Status: " + status); } + } else { + logger.severe("DRS Ingest Failed for: " + packageId + " with status code: " + code); + return new Failure("DRS Archiver fail in Ingest call with status cvode: " + code); } } catch (ClientProtocolException e2) { e2.printStackTrace(); @@ -132,14 +193,12 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } else { - logger.warning("DRS: S3 archiving failed - will not call ingest: " + getSpaceName(dataset) + "_v" - + dv.getFriendlyVersionNumber()); + logger.warning("DRS: S3 archiving failed - will not call ingest: " + packageId); return new Failure("DRS Archiver fail in initial S3 Archiver transfer"); } dv.setArchivalCopyLocation(statusObject.build().toString()); } else { - logger.fine("DRS Archiver: No matching collection found - will not archive: " + getSpaceName(dataset) - + "_v" + dv.getFriendlyVersionNumber()); + logger.fine("DRS Archiver: No matching collection found - will not archive: " + packageId); return WorkflowStepResult.OK; } } else { @@ -174,7 +233,7 @@ public static boolean isArchivable(Dataset d, SettingsWrapper sw) { logger.warning("Unable to parse " + DRS_CONFIG + " setting as a Json object"); } if (drsConfigObject != null) { - Set collections = drsConfigObject.getJsonObject("collections").keySet(); + Set collections = drsConfigObject.getJsonObject(COLLECTIONS).keySet(); return getArchivableAncestor(d.getOwner(), collections) != null; } return false; From 44a52bdd46e32ee7f5d3732c26682a03b987b36f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Apr 2022 09:28:19 -0400 Subject: [PATCH 030/608] cleanup --- .../command/impl/DRSSubmitToArchiveCommand.java | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index fba236457a6..d8876b57405 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -35,7 +35,6 @@ import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpPost; import org.apache.http.conn.ssl.NoopHostnameVerifier; -import org.apache.http.conn.ssl.SSLConnectionSocketFactory; import org.apache.http.conn.ssl.TrustAllStrategy; import org.apache.http.entity.StringEntity; import org.apache.http.impl.client.CloseableHttpClient; @@ -124,11 +123,10 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t if (trustCert) { // use the TrustSelfSignedStrategy to allow Self Signed Certificates try { - SSLContext sslContext = SSLContextBuilder - .create() - .loadTrustMaterial(new TrustAllStrategy()) + SSLContext sslContext = SSLContextBuilder.create().loadTrustMaterial(new TrustAllStrategy()) .build(); - client = HttpClients.custom().setSSLContext(sslContext).setSSLHostnameVerifier(NoopHostnameVerifier.INSTANCE).build(); + client = HttpClients.custom().setSSLContext(sslContext) + .setSSLHostnameVerifier(NoopHostnameVerifier.INSTANCE).build(); } catch (KeyManagementException e) { // TODO Auto-generated catch block e.printStackTrace(); @@ -139,8 +137,8 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t // TODO Auto-generated catch block e.printStackTrace(); } - } - if(client == null) { + } + if (client == null) { client = HttpClients.createDefault(); } HttpPost ingestPost; From 48835b232dd212c7685a79e622a4dfb9329f4f50 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Apr 2022 09:50:03 -0400 Subject: [PATCH 031/608] align json config structure with api --- .../impl/DRSSubmitToArchiveCommand.java | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index d8876b57405..cd7bc672ef0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -50,6 +50,7 @@ public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implemen private static final String PENDING = "pending"; private static final String ADMIN_METADATA = "admin_metadata"; private static final String S3_BUCKET_NAME = "s3_bucket_name"; + private static final String S3_PATH = "s3_path"; private static final String COLLECTIONS = "collections"; private static final String PACKAGE_ID = "package_id"; private static final String TRUST_CERT = "trust_cert"; @@ -70,7 +71,8 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t logger.warning("Unable to parse " + DRS_CONFIG + " setting as a Json object"); } if (drsConfigObject != null) { - Set collections = drsConfigObject.getJsonObject(COLLECTIONS).keySet(); + JsonObject adminMetadata = drsConfigObject.getJsonObject(ADMIN_METADATA); + Set collections = adminMetadata.getJsonObject(COLLECTIONS).keySet(); Dataset dataset = dv.getDataset(); Dataverse ancestor = dataset.getOwner(); String alias = getArchivableAncestor(ancestor, collections); @@ -78,7 +80,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t String packageId = spaceName + ".v" + dv.getFriendlyVersionNumber(); if (alias != null) { - JsonObject collectionConfig = drsConfigObject.getJsonObject(COLLECTIONS).getJsonObject(alias); + JsonObject collectionConfig = adminMetadata.getJsonObject(COLLECTIONS).getJsonObject(alias); WorkflowStepResult s3Result = super.performArchiveSubmission(dv, token, requestedSettings); @@ -91,20 +93,20 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t statusObject.add("message", "Bag transferred"); // Now contact DRS - JsonObjectBuilder job = Json.createObjectBuilder(drsConfigObject); - JsonObjectBuilder amob = Json.createObjectBuilder(); - if (drsConfigObject.containsKey(ADMIN_METADATA)) { - amob = Json.createObjectBuilder(drsConfigObject.getJsonObject(ADMIN_METADATA)); - } - boolean trustCert = drsConfigObject.getBoolean(TRUST_CERT, false); - job.remove(TRUST_CERT); - job.remove(COLLECTIONS); - job.remove(ADMIN_METADATA); - job.remove("DRSendpoint"); + + JsonObjectBuilder job = Json.createObjectBuilder(); + + job.add(S3_BUCKET_NAME, adminMetadata.getString(S3_BUCKET_NAME)); + job.add(PACKAGE_ID, packageId); + job.add(S3_PATH, spaceName); - job.add("s3_path", spaceName); + //We start with the default admin_metadata + JsonObjectBuilder amob = Json.createObjectBuilder(adminMetadata); + //Remove collections and then override any params for the given alias + amob.remove(COLLECTIONS); + //Allow override of bucket name if (collectionConfig.containsKey(S3_BUCKET_NAME)) { job.add(S3_BUCKET_NAME, collectionConfig.get(S3_BUCKET_NAME)); } From f36791060fbd5b2293d9ae3c6c629c5679f3667f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Apr 2022 10:24:25 -0400 Subject: [PATCH 032/608] update isArchivable logic to match json change --- .../impl/DRSSubmitToArchiveCommand.java | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index cd7bc672ef0..aa98fe957b8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -94,19 +94,19 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t // Now contact DRS boolean trustCert = drsConfigObject.getBoolean(TRUST_CERT, false); - + JsonObjectBuilder job = Json.createObjectBuilder(); - + job.add(S3_BUCKET_NAME, adminMetadata.getString(S3_BUCKET_NAME)); job.add(PACKAGE_ID, packageId); job.add(S3_PATH, spaceName); - //We start with the default admin_metadata + // We start with the default admin_metadata JsonObjectBuilder amob = Json.createObjectBuilder(adminMetadata); - //Remove collections and then override any params for the given alias + // Remove collections and then override any params for the given alias amob.remove(COLLECTIONS); - //Allow override of bucket name + // Allow override of bucket name if (collectionConfig.containsKey(S3_BUCKET_NAME)) { job.add(S3_BUCKET_NAME, collectionConfig.get(S3_BUCKET_NAME)); } @@ -233,8 +233,14 @@ public static boolean isArchivable(Dataset d, SettingsWrapper sw) { logger.warning("Unable to parse " + DRS_CONFIG + " setting as a Json object"); } if (drsConfigObject != null) { - Set collections = drsConfigObject.getJsonObject(COLLECTIONS).keySet(); - return getArchivableAncestor(d.getOwner(), collections) != null; + JsonObject adminMetadata = drsConfigObject.getJsonObject(ADMIN_METADATA); + if (adminMetadata != null) { + JsonObject collectionObj = adminMetadata.getJsonObject(COLLECTIONS); + if (collectionObj != null) { + Set collections = collectionObj.keySet(); + return getArchivableAncestor(d.getOwner(), collections) != null; + } + } } return false; } From b9eb8fe65bfd26847abc453a6b2db09394fe4778 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 6 Apr 2022 12:25:45 -0400 Subject: [PATCH 033/608] change log level to use on a Payara 5.2021.5 machine --- .../command/impl/DRSSubmitToArchiveCommand.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index aa98fe957b8..ecebf13bd8e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -62,7 +62,7 @@ public DRSSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion versi @Override public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, Map requestedSettings) { - logger.fine("In DRSSubmitToArchiveCommand..."); + logger.info("In DRSSubmitToArchiveCommand..."); JsonObject drsConfigObject = null; try { @@ -148,7 +148,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t ingestPost = new HttpPost(); ingestPost.setURI(new URI(drsConfigObject.getString("DRSendpoint"))); String body = drsConfigString; - logger.fine("Body: " + body); + logger.info("Body: " + body); ingestPost.setEntity(new StringEntity(body, "utf-8")); ingestPost.setHeader("Content-Type", "application/json"); @@ -163,13 +163,13 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t String responseBody = new String(response.getEntity().getContent().readAllBytes(), StandardCharsets.UTF_8); if (code == 202) { - logger.fine("Status: " + code); - logger.fine("Response" + responseBody); + logger.info("Status: " + code); + logger.info("Response" + responseBody); JsonObject responseObject = JsonUtil.getJsonObject(responseBody); String status = responseObject.getString("status"); switch (status) { case PENDING: - logger.fine("DRS Ingest successfully started for: " + packageId + " : " + logger.info("DRS Ingest successfully started for: " + packageId + " : " + responseObject.toString()); statusObject.add("status", status); statusObject.add("message", responseObject.getString("message")); @@ -198,7 +198,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } dv.setArchivalCopyLocation(statusObject.build().toString()); } else { - logger.fine("DRS Archiver: No matching collection found - will not archive: " + packageId); + logger.info("DRS Archiver: No matching collection found - will not archive: " + packageId); return WorkflowStepResult.OK; } } else { From f77b536da8549d1513a7ec4edd929953c7924e0a Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 7 Apr 2022 15:00:37 -0400 Subject: [PATCH 034/608] first draft of status API --- .../edu/harvard/iq/dataverse/api/Admin.java | 103 ++++++++++++++++-- .../impl/AbstractSubmitToArchiveCommand.java | 9 ++ 2 files changed, 100 insertions(+), 12 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 719b4aeb1ba..d90a99aa674 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -44,6 +44,7 @@ import javax.json.Json; import javax.json.JsonArrayBuilder; import javax.json.JsonObjectBuilder; +import javax.ws.rs.Consumes; import javax.ws.rs.DELETE; import javax.ws.rs.DefaultValue; import javax.ws.rs.GET; @@ -51,6 +52,7 @@ import javax.ws.rs.PUT; import javax.ws.rs.Path; import javax.ws.rs.PathParam; +import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; @@ -99,6 +101,8 @@ import java.io.IOException; import java.io.OutputStream; import edu.harvard.iq.dataverse.util.json.JsonPrinter; +import edu.harvard.iq.dataverse.util.json.JsonUtil; + import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.rolesToJson; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.toJsonArray; @@ -1728,31 +1732,37 @@ public Response validateDataFileHashValue(@PathParam("fileId") String fileId) { } - @GET - @Path("/submitDataVersionToArchive/{id}/{version}") - public Response submitDatasetVersionToArchive(@PathParam("id") String dsid, @PathParam("version") String versionNumber) { + @POST + @Path("/submitDatasetVersionToArchive/{id}/{version}") + public Response submitDatasetVersionToArchive(@PathParam("id") String dsid, + @PathParam("version") String versionNumber) { try { AuthenticatedUser au = findAuthenticatedUserOrDie(); - // Note - the user is being set in the session so it becomes part of the - // DataverseRequest and is sent to the back-end command where it is used to get - // the API Token which is then used to retrieve files (e.g. via S3 direct - // downloads) to create the Bag + if (!au.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Superusers only."); + } + // Note - the user is being set in the session so it becomes part of the + // DataverseRequest and is sent to the back-end command where it is used to get + // the API Token which is then used to retrieve files (e.g. via S3 direct + // downloads) to create the Bag session.setUser(au); // TODO: Stop using session. Use createDataverseRequest instead. Dataset ds = findDatasetOrDie(dsid); DatasetVersion dv = datasetversionService.findByFriendlyVersionNumber(ds.getId(), versionNumber); if (dv.getArchivalCopyLocation() == null) { String className = settingsService.getValueForKey(SettingsServiceBean.Key.ArchiverClassName); - AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dv); + AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, + dvRequestService.getDataverseRequest(), dv); if (cmd != null) { new Thread(new Runnable() { public void run() { try { DatasetVersion dv = commandEngine.submit(cmd); if (dv.getArchivalCopyLocation() != null) { - logger.info("DatasetVersion id=" + ds.getGlobalId().toString() + " v" + versionNumber + " submitted to Archive at: " - + dv.getArchivalCopyLocation()); + logger.info( + "DatasetVersion id=" + ds.getGlobalId().toString() + " v" + versionNumber + + " submitted to Archive at: " + dv.getArchivalCopyLocation()); } else { logger.severe("Error submitting version due to conflict/error at Archive"); } @@ -1761,7 +1771,8 @@ public void run() { } } }).start(); - return ok("Archive submission using " + cmd.getClass().getCanonicalName() + " started. Processing can take significant time for large datasets. View log and/or check archive for results."); + return ok("Archive submission using " + cmd.getClass().getCanonicalName() + + " started. Processing can take significant time for large datasets. View log and/or check archive for results."); } else { logger.log(Level.SEVERE, "Could not find Archiver class: " + className); return error(Status.INTERNAL_SERVER_ERROR, "Could not find Archiver class: " + className); @@ -1774,6 +1785,74 @@ public void run() { } } + @GET + @Produces(MediaType.APPLICATION_JSON) + @Path("/submitDataVersionToArchive/{id}/{version}/status") + public Response getDatasetVersionToArchiveStatus(@PathParam("id") String dsid, + @PathParam("version") String versionNumber) { + + try { + AuthenticatedUser au = findAuthenticatedUserOrDie(); + if (!au.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Superusers only."); + } + Dataset ds = findDatasetOrDie(dsid); + + DatasetVersion dv = datasetversionService.findByFriendlyVersionNumber(ds.getId(), versionNumber); + if (dv.getArchivalCopyLocation() == null) { + return error(Status.NO_CONTENT, "This dataset version has not been archived"); + } else { + JsonObject status = JsonUtil.getJsonObject(dv.getArchivalCopyLocation()); + return ok(status); + } + } catch (WrappedResponse e1) { + return error(Status.UNAUTHORIZED, "api key required"); + } + } + + @PUT + @Consumes(MediaType.APPLICATION_JSON) + @Path("/submitDataVersionToArchive/{id}/{version}/status") + public Response setDatasetVersionToArchiveStatus(@PathParam("id") String dsid, + @PathParam("version") String versionNumber, JsonObject update) { + + try { + AuthenticatedUser au = findAuthenticatedUserOrDie(); + + if (!au.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Superusers only."); + } + } catch (WrappedResponse e1) { + return error(Status.UNAUTHORIZED, "api key required"); + } + if (update.containsKey(AbstractSubmitToArchiveCommand.STATUS) + && update.containsKey(AbstractSubmitToArchiveCommand.MESSAGE)) { + String status = update.getString(AbstractSubmitToArchiveCommand.STATUS); + if (status.equals(AbstractSubmitToArchiveCommand.PENDING) + || status.equals(AbstractSubmitToArchiveCommand.FAILURE) + || status.equals(AbstractSubmitToArchiveCommand.SUCCESS)) { + + try { + Dataset ds; + + ds = findDatasetOrDie(dsid); + + DatasetVersion dv = datasetversionService.findByFriendlyVersionNumber(ds.getId(), versionNumber); + if(dv==null) { + return error(Status.NOT_FOUND, "Dataset version not found"); + } + dv.setArchivalCopyLocation(JsonUtil.prettyPrint(update)); + + } catch (WrappedResponse e) { + return error(Status.NOT_FOUND, "Dataset not found"); + } + } + } + return error(Status.BAD_REQUEST, "Unacceptable status format"); + } + + + /** * Iteratively archives all unarchived dataset versions @@ -1783,7 +1862,7 @@ public void run() { * lastestonly - only archive the latest versions * @return */ - @GET + @POST @Path("/archiveAllUnarchivedDatasetVersions") public Response archiveAllUnarchivedDatasetVersions(@QueryParam("listonly") boolean listonly, @QueryParam("limit") Integer limit, @QueryParam("latestonly") boolean latestonly) { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index e919f81e6e9..dad17df38c6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -27,6 +27,15 @@ public abstract class AbstractSubmitToArchiveCommand extends AbstractCommand requestedSettings = new HashMap(); private static final Logger logger = Logger.getLogger(AbstractSubmitToArchiveCommand.class.getName()); + //Status message required components + public static final String STATUS = "status"; + public static final String MESSAGE = "message"; + //Allowed Statuses + public static final String PENDING = "pending"; + public static final String SUCCESS = "success"; + public static final String FAILURE = "failure"; + + public AbstractSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version.getDataset()); this.version = version; From 4a86f48347e7e8ff4496f7ed34f4a4c378273fb9 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 8 Apr 2022 11:05:33 -0400 Subject: [PATCH 035/608] revert changes from TDL --- src/main/java/propertyFiles/Bundle.properties | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index fb788d42664..9895cffe0e7 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -145,7 +145,7 @@ contact.header=Contact {0} contact.dataverse.header=Email Dataverse Contact contact.dataset.header=Email Dataset Contact contact.to=To -contact.support=TDL Dataverse Support +contact.support=Support contact.from=From contact.from.required=User email is required. contact.from.invalid=Email is invalid. @@ -317,9 +317,9 @@ login.System=Login System login.forgot.text=Forgot your password? login.builtin=Dataverse Account login.institution=Institutional Account -login.institution.blurb=Log in or sign up with your institutional account — learn more. If you are not affiliated with a TDR member institution (see dropdown menu), please use the Google Login option. +login.institution.blurb=Log in or sign up with your institutional account — more information about account creation. login.institution.support.blurbwithLink=Leaving your institution? Please contact {0} for assistance. -login.builtin.credential.usernameOrEmail=Admin ID +login.builtin.credential.usernameOrEmail=Username/Email login.builtin.credential.password=Password login.builtin.invalidUsernameEmailOrPassword=The username, email address, or password you entered is invalid. Need assistance accessing your account? login.signup.blurb=Sign up for a Dataverse account. @@ -335,12 +335,12 @@ login.button.orcid=Create or Connect your ORCID # authentication providers auth.providers.title=Other options auth.providers.tip=You can convert a Dataverse account to use one of the options above. More information about account creation. -auth.providers.title.builtin=Admin ID +auth.providers.title.builtin=Username/Email auth.providers.title.shib=Your Institution auth.providers.title.orcid=ORCID -auth.providers.title.google=Google (No TDR affiliation) +auth.providers.title.google=Google auth.providers.title.github=GitHub -auth.providers.blurb=Log in or sign up with your Google account — learn more. If you are not affiliated with a TDR member institution, please use the Google Login option. Having trouble? Please contact {3} for assistance. +auth.providers.blurb=Log in or sign up with your {0} account — more information about account creation. Having trouble? Please contact {3} for assistance. auth.providers.persistentUserIdName.orcid=ORCID iD auth.providers.persistentUserIdName.github=ID auth.providers.persistentUserIdTooltip.orcid=ORCID provides a persistent digital identifier that distinguishes you from other researchers. @@ -383,7 +383,7 @@ shib.welcomeExistingUserMessageDefaultInstitution=your institution shib.dataverseUsername=Dataverse Username shib.currentDataversePassword=Current Dataverse Password shib.accountInformation=Account Information -shib.offerToCreateNewAccount=Contact your TDR liaison to get help and training. Published content cannot be easily deleted. +shib.offerToCreateNewAccount=This information is provided by your institution and will be used to create your Dataverse account. shib.passwordRejected=Validation Error - Your account can only be converted if you provide the correct password for your existing account. If your existing account has been deactivated by an administrator, you cannot convert your account. # oauth2/firstLogin.xhtml From d286841fe5464a7a481a20d79247499d761342b4 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 8 Apr 2022 14:27:00 -0400 Subject: [PATCH 036/608] add canonicalization --- pom.xml | 16 ++- .../impl/DRSSubmitToArchiveCommand.java | 123 +++++++++++++----- .../impl/DRSSubmitToArchiveCommandTest.java | 93 +++++++++++++ 3 files changed, 197 insertions(+), 35 deletions(-) create mode 100644 src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java diff --git a/pom.xml b/pom.xml index 995e90b0029..6d75bdd39e0 100644 --- a/pom.xml +++ b/pom.xml @@ -50,7 +50,7 @@ --> - + @@ -502,7 +502,19 @@ google-cloud-storage - + + + + com.auth0 + java-jwt + 3.19.1 + + + + io.github.erdtman + java-json-canonicalization + 1.1 + diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index ecebf13bd8e..aa4d1255477 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -6,6 +6,7 @@ import edu.harvard.iq.dataverse.SettingsWrapper; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; @@ -17,9 +18,17 @@ import java.net.URI; import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; +import java.security.KeyFactory; import java.security.KeyManagementException; import java.security.KeyStoreException; import java.security.NoSuchAlgorithmException; +import java.security.interfaces.RSAPrivateKey; +import java.security.interfaces.RSAPublicKey; +import java.security.spec.InvalidKeySpecException; +import java.security.spec.PKCS8EncodedKeySpec; +import java.time.Instant; +import java.util.Base64; +import java.util.Date; import java.util.Map; import java.util.Map.Entry; import java.util.Set; @@ -31,6 +40,7 @@ import javax.json.JsonValue; import javax.net.ssl.SSLContext; +import org.apache.commons.codec.digest.DigestUtils; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpPost; @@ -41,6 +51,13 @@ import org.apache.http.impl.client.HttpClients; import org.apache.http.ssl.SSLContextBuilder; +import org.erdtman.jcs.JsonCanonicalizer; + +import com.auth0.jwt.JWT; +import com.auth0.jwt.algorithms.Algorithm; +import com.auth0.jwt.exceptions.JWTCreationException; +import com.auth0.jwt.interfaces.DecodedJWT; + @RequiredPermissions(Permission.PublishDataset) public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implements Command { @@ -53,6 +70,9 @@ public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implemen private static final String S3_PATH = "s3_path"; private static final String COLLECTIONS = "collections"; private static final String PACKAGE_ID = "package_id"; + + private static final String RSA_KEY = "dataverse.archiver.drs.rsa_key"; + private static final String TRUST_CERT = "trust_cert"; public DRSSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { @@ -147,50 +167,79 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t try { ingestPost = new HttpPost(); ingestPost.setURI(new URI(drsConfigObject.getString("DRSendpoint"))); + + byte[] encoded = Base64.getDecoder().decode(System.getProperty(RSA_KEY)); + + KeyFactory keyFactory = KeyFactory.getInstance("RSA"); + PKCS8EncodedKeySpec keySpec = new PKCS8EncodedKeySpec(encoded); + RSAPrivateKey privKey = (RSAPrivateKey) keyFactory.generatePrivate(keySpec); + RSAPublicKey publicKey; + /* + * If public key is needed: encoded = Base64.decodeBase64(publicKeyPEM); + * + * KeyFactory keyFactory = KeyFactory.getInstance("RS256"); X509EncodedKeySpec + * keySpec = new X509EncodedKeySpec(encoded); return (RSAPublicKey) + * keyFactory.generatePublic(keySpec); RSAPublicKey publicKey = new + * RSAPublicKey(System.getProperty(RS256_KEY)); + */ + Algorithm algorithmRSA = Algorithm.RSA256(null, privKey); + String body = drsConfigString; + String jwtString = createJWTString(algorithmRSA, BrandingUtil.getInstallationBrandName(), body, 5); + logger.info("JWT: " + jwtString); + + ingestPost.setHeader("Authorization: Bearer", jwtString); + logger.info("Body: " + body); ingestPost.setEntity(new StringEntity(body, "utf-8")); ingestPost.setHeader("Content-Type", "application/json"); + try (CloseableHttpResponse response = client.execute(ingestPost)) { + int code = response.getStatusLine().getStatusCode(); + String responseBody = new String(response.getEntity().getContent().readAllBytes(), + StandardCharsets.UTF_8); + if (code == 202) { + logger.info("Status: " + code); + logger.info("Response" + responseBody); + JsonObject responseObject = JsonUtil.getJsonObject(responseBody); + String status = responseObject.getString("status"); + switch (status) { + case PENDING: + logger.info("DRS Ingest successfully started for: " + packageId + " : " + + responseObject.toString()); + statusObject.add("status", status); + statusObject.add("message", responseObject.getString("message")); + break; + case FAILURE: + logger.severe( + "DRS Ingest Failed for: " + packageId + " : " + responseObject.toString()); + return new Failure("DRS Archiver fail in Ingest call"); + default: + logger.warning("Unexpected Status: " + status); + } + } else { + logger.severe("DRS Ingest Failed for: " + packageId + " with status code: " + code); + return new Failure("DRS Archiver fail in Ingest call with status cvode: " + code); + } + } catch (ClientProtocolException e2) { + e2.printStackTrace(); + } catch (IOException e2) { + e2.printStackTrace(); + } } catch (URISyntaxException e) { return new Failure( "LDNAnnounceDatasetVersion workflow step failed: unable to parse inbox in :LDNTarget setting."); + } catch (JWTCreationException exception) { + // Invalid Signing configuration / Couldn't convert Claims. } // execute - - try (CloseableHttpResponse response = client.execute(ingestPost)) { - int code = response.getStatusLine().getStatusCode(); - String responseBody = new String(response.getEntity().getContent().readAllBytes(), - StandardCharsets.UTF_8); - if (code == 202) { - logger.info("Status: " + code); - logger.info("Response" + responseBody); - JsonObject responseObject = JsonUtil.getJsonObject(responseBody); - String status = responseObject.getString("status"); - switch (status) { - case PENDING: - logger.info("DRS Ingest successfully started for: " + packageId + " : " - + responseObject.toString()); - statusObject.add("status", status); - statusObject.add("message", responseObject.getString("message")); - break; - case FAILURE: - logger.severe( - "DRS Ingest Failed for: " + packageId + " : " + responseObject.toString()); - return new Failure("DRS Archiver fail in Ingest call"); - default: - logger.warning("Unexpected Status: " + status); - } - } else { - logger.severe("DRS Ingest Failed for: " + packageId + " with status code: " + code); - return new Failure("DRS Archiver fail in Ingest call with status cvode: " + code); - } - } catch (ClientProtocolException e2) { - e2.printStackTrace(); - } catch (IOException e2) { - e2.printStackTrace(); + catch (InvalidKeySpecException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (NoSuchAlgorithmException e) { +// TODO Auto-generated catch block + e.printStackTrace(); } - } else { logger.warning("DRS: S3 archiving failed - will not call ingest: " + packageId); @@ -208,6 +257,14 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t return WorkflowStepResult.OK; } + public static String createJWTString(Algorithm algorithmRSA, String installationBrandName, String body, int expirationInMinutes) throws IOException { + String canonicalBody = new JsonCanonicalizer(body).getEncodedString(); + String digest = DigestUtils.sha256Hex(canonicalBody); + return JWT.create().withIssuer(BrandingUtil.getInstallationBrandName()).withIssuedAt(Date.from(Instant.now())) + .withExpiresAt(Date.from(Instant.now().plusSeconds(60 * expirationInMinutes))) + .withKeyId("defaultDataverse").withClaim("bodySHA256Hash", digest).sign(algorithmRSA); + } + private static String getArchivableAncestor(Dataverse ancestor, Set collections) { String alias = ancestor.getAlias(); while (ancestor != null && !collections.contains(alias)) { diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java new file mode 100644 index 00000000000..bf6f4dd8a4e --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java @@ -0,0 +1,93 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.branding.BrandingUtil; + +import org.apache.commons.codec.digest.DigestUtils; +import org.junit.Assert; +import org.junit.Test; + +import com.auth0.jwt.JWT; +import com.auth0.jwt.algorithms.Algorithm; +import com.auth0.jwt.interfaces.DecodedJWT; + +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import java.security.KeyFactory; +import java.security.interfaces.RSAPrivateKey; +import java.security.interfaces.RSAPublicKey; +import java.security.spec.PKCS8EncodedKeySpec; +import java.time.Instant; +import java.util.Base64; +import java.util.Date; + +/** + * + * @author michael + */ +public class DRSSubmitToArchiveCommandTest { + + @Test + public void createJWT() throws CommandException { + + String privKeyString = "MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCzSwj+c/uiRz5A" + + "OiDWsV5pxJrdzlDRV2PKKwRGCzhv1MEPwQCvFp6wZRDgCE4EfpVUuByNInV1eOfr" + + "BjwIlxp8hv9RPYCAsPCFV46VLeZsr8FOfvqI6IswYqB3qwdi5NW+CuJRLgTFJP87" + + "X5GgoItVnE0/DxIuZobuaEEzPa8TV8kUvdehzxTlkMTay5J/USeyKsUjPozqgKtN" + + "4ScCWrQx2FXEuKoCg85wNgFRJHgSGBH07lNAYV2tOz+w0ToSNzKswNqhTpRl7W61" + + "gzDCFJu6IYreH9bH5eh/Z9BzjNOs16k0Ok2PmQhOhHYCT3fdkKogriSREVN5dlHi" + + "FV7eB577AgMBAAECggEAPGfLX+8zmDjogDsVVT/szzWt94zLLbyDollb1z1whjzn" + + "zqb31AWK8WMbjF8/6cO8DA77j5FMgYd6m3Q+RaajBdF1s6lE4ha68jHNl/Ue7P9J" + + "4WhmgDnYqzSPW8IDew4d9Sk1lqQqd0E/vIE2TyfHydAfNl+dgISKcUgur1TY52rb" + + "taldnMP44BoXSeKM1qMAE7tWXDQlRjDdcx2Vn6nKJ4iCC6490JSGaFpsoock9wkF" + + "Fi1euzVnvX3ksyioXHMZwzZ9ErCHsI+Px25xiroyloxeoj0zfcA8kZcC9vyoa9HF" + + "2p62iK6RM7JCQc7yMcSN2Fp8PzyHlOLgdI+8CKV4AQKBgQDYmVFenIbapLgN3uyW" + + "gPTgUQGdnLf2S1g1HHHw7+74aZuMKq20w8Ikv6qWMx07R05gm8yxQ1Z4ciLcEw2z" + + "KBurLte/t6ZAJXQ7wnbPyX1JPFQNxKJrPKq+FynnANrdPVgwUunmO9JJbsudU/cG" + + "WKaQiG0w5ltvXg1NY5i1doifawKBgQDT6HFxh31nGUySNRQloE9mpvbzT35ornvl" + + "0oMlCYX2M52C3/nH/rq30woP4hDMBlvq3V6blOzPHzQwlu4+4OKBqvxlAluYIoXP" + + "QD1vJhb7eti+mYnIWyQ6hnAhrg/WDxn69mixEson2EL68+WRawz61h3WbfKoivbe" + + "YP02G2uysQKBgBOPFLf0boED6tLl1HtqvbIb3od7BWmqOBbjsK5PHEc2UiOAHxt5" + + "qehjnmXdy7/0mnFC4GMJb5+Evv0cg1owPv9gRX88eDjGqQ5UayIsUbHxTq3HmdsR" + + "KWHs+Y2wmBLuXS5P7msp771N0fktAduC2denWiTWSF9wIMdiPQH16DRtAoGBAKs4" + + "ABmEKT4ZgfYMryervRwrQhPcIj5A5VkP2+kcJcKFd/pcMH15A7Mt8M5ekcXYSYKe" + + "tSeukBzWkJvGB+CEYl/1IRQYcJufIVERDdJ2C1HMs75lXp+ljMNBBu8frin+b7aI" + + "TJTuoqrJIW2VjeMOhSFTyi4NDmlCRy/tXArQ4xcxAoGAUppOsJZeF/1kPQIFwBkS" + + "bVuGxMscWKswHy6dXEq2VabVGBL8H33PkpJRBnw7S/f+8wvk9dX63NuTF6VYM546" + + "J73YadnpU82C+7OnaTTCDVPfXYgPFLpE9xKFKkRFacgUbEnvZ2i0zSUquH0RAyaK" + + "tJ0d/dnd5TQUccAZwT8Nrw0="; + + //Todo - not in pkcs8 form + String pubKeyString = "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAs0sI/nP7okc+QDog1rFe" + + "acSa3c5Q0VdjyisERgs4b9TBD8EArxaesGUQ4AhOBH6VVLgcjSJ1dXjn6wY8CJca" + + "fIb/UT2AgLDwhVeOlS3mbK/BTn76iOiLMGKgd6sHYuTVvgriUS4ExST/O1+RoKCL" + + "VZxNPw8SLmaG7mhBMz2vE1fJFL3Xoc8U5ZDE2suSf1EnsirFIz6M6oCrTeEnAlq0" + + "MdhVxLiqAoPOcDYBUSR4EhgR9O5TQGFdrTs/sNE6EjcyrMDaoU6UZe1utYMwwhSb" + + "uiGK3h/Wx+Xof2fQc4zTrNepNDpNj5kIToR2Ak933ZCqIK4kkRFTeXZR4hVe3gee" + + "+wIDAQAB"; + + byte[] encoded = Base64.getDecoder().decode(privKeyString); + try { + KeyFactory keyFactory = KeyFactory.getInstance("RSA"); + PKCS8EncodedKeySpec keySpec = new PKCS8EncodedKeySpec(encoded); + RSAPrivateKey privKey = (RSAPrivateKey) keyFactory.generatePrivate(keySpec); + RSAPublicKey publicKey; + /* + * If public key is needed: encoded = Base64.decodeBase64(publicKeyPEM); + * + * KeyFactory keyFactory = KeyFactory.getInstance("RSA"); X509EncodedKeySpec + * keySpec = new X509EncodedKeySpec(encoded); return (RSAPublicKey) + * keyFactory.generatePublic(keySpec); RSAPublicKey publicKey = new + * RSAPublicKey(System.getProperty(RS256_KEY)); + */ + Algorithm algorithmRSA = Algorithm.RSA256(null, privKey); + String token1 = DRSSubmitToArchiveCommand.createJWTString(algorithmRSA, BrandingUtil.getInstallationBrandName(), "{\"stuff\":\"important\"}", 5); + + System.out.println("JWT: " + token1); + DecodedJWT jwt = JWT.decode(token1); + System.out.println(jwt.getPayload()); + } catch (Exception e) { + System.out.println(e.getLocalizedMessage()); + Assert.fail(e.getLocalizedMessage()); + } + + } +} From cf9363211ef3e66842f9725f0d26ddd2160a1a76 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 8 Apr 2022 14:33:54 -0400 Subject: [PATCH 037/608] catch exception, log canonical form --- .../engine/command/impl/DRSSubmitToArchiveCommand.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index aa4d1255477..b1312f8f103 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -239,6 +239,9 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } catch (NoSuchAlgorithmException e) { // TODO Auto-generated catch block e.printStackTrace(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); } } else { @@ -259,6 +262,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t public static String createJWTString(Algorithm algorithmRSA, String installationBrandName, String body, int expirationInMinutes) throws IOException { String canonicalBody = new JsonCanonicalizer(body).getEncodedString(); + logger.fine("Canonical body: " + canonicalBody); String digest = DigestUtils.sha256Hex(canonicalBody); return JWT.create().withIssuer(BrandingUtil.getInstallationBrandName()).withIssuedAt(Date.from(Instant.now())) .withExpiresAt(Date.from(Instant.now().plusSeconds(60 * expirationInMinutes))) From a1c4484b40e5aec8043907df1fe59df98d37a552 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 8 Apr 2022 15:21:59 -0400 Subject: [PATCH 038/608] refactor, add initial archiving status display --- .../harvard/iq/dataverse/DatasetVersion.java | 41 +++++++++++++++++++ .../edu/harvard/iq/dataverse/api/Admin.java | 12 +++--- .../impl/AbstractSubmitToArchiveCommand.java | 9 ---- src/main/webapp/dataset-versions.xhtml | 15 ++++--- 4 files changed, 57 insertions(+), 20 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index f211ccd0410..4007f7f1cbc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -10,6 +10,7 @@ import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.DateUtil; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; import edu.harvard.iq.dataverse.workflows.WorkflowComment; import java.io.Serializable; @@ -26,6 +27,7 @@ import javax.json.Json; import javax.json.JsonArray; import javax.json.JsonArrayBuilder; +import javax.json.JsonObject; import javax.json.JsonObjectBuilder; import javax.persistence.CascadeType; import javax.persistence.Column; @@ -93,6 +95,14 @@ public enum VersionState { public static final int ARCHIVE_NOTE_MAX_LENGTH = 1000; public static final int VERSION_NOTE_MAX_LENGTH = 1000; + //Archival copies: Status message required components + public static final String STATUS = "status"; + public static final String MESSAGE = "message"; + //Archival Copies: Allowed Statuses + public static final String PENDING = "pending"; + public static final String SUCCESS = "success"; + public static final String FAILURE = "failure"; + @Id @GeneratedValue(strategy = GenerationType.IDENTITY) private Long id; @@ -179,6 +189,8 @@ public enum VersionState { @Transient private DatasetVersionDifference dvd; + @Transient + private JsonObject archivalStatus; public Long getId() { return this.id; @@ -318,9 +330,38 @@ public void setArchiveNote(String note) { public String getArchivalCopyLocation() { return archivalCopyLocation; } + + public String getArchivalCopyLocationStatus() { + populateArchivalStatus(); + + if(archivalStatus!=null) { + return archivalStatus.getString(STATUS); + } + return null; + } + public String getArchivalCopyLocationMessage() { + populateArchivalStatus(); + if(archivalStatus!=null) { + return archivalStatus.getString(MESSAGE); + } + return null; + } + + private void populateArchivalStatus() { + if(archivalStatus ==null) { + if(archivalCopyLocation!=null) { + try { + archivalStatus = JsonUtil.getJsonObject(archivalCopyLocation); + } catch(Exception e) { + logger.warning("DatasetVersion id: " + id + "has a non-JsonObject value, parsing error: " + e.getMessage()); + } + } + } + } public void setArchivalCopyLocation(String location) { this.archivalCopyLocation = location; + populateArchivalStatus(); } public String getDeaccessionLink() { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index d90a99aa674..93a6abee9fa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -1825,12 +1825,12 @@ public Response setDatasetVersionToArchiveStatus(@PathParam("id") String dsid, } catch (WrappedResponse e1) { return error(Status.UNAUTHORIZED, "api key required"); } - if (update.containsKey(AbstractSubmitToArchiveCommand.STATUS) - && update.containsKey(AbstractSubmitToArchiveCommand.MESSAGE)) { - String status = update.getString(AbstractSubmitToArchiveCommand.STATUS); - if (status.equals(AbstractSubmitToArchiveCommand.PENDING) - || status.equals(AbstractSubmitToArchiveCommand.FAILURE) - || status.equals(AbstractSubmitToArchiveCommand.SUCCESS)) { + if (update.containsKey(DatasetVersion.STATUS) + && update.containsKey(DatasetVersion.MESSAGE)) { + String status = update.getString(DatasetVersion.STATUS); + if (status.equals(DatasetVersion.PENDING) + || status.equals(DatasetVersion.FAILURE) + || status.equals(DatasetVersion.SUCCESS)) { try { Dataset ds; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index dad17df38c6..e919f81e6e9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -27,15 +27,6 @@ public abstract class AbstractSubmitToArchiveCommand extends AbstractCommand requestedSettings = new HashMap(); private static final Logger logger = Logger.getLogger(AbstractSubmitToArchiveCommand.class.getName()); - //Status message required components - public static final String STATUS = "status"; - public static final String MESSAGE = "message"; - //Allowed Statuses - public static final String PENDING = "pending"; - public static final String SUCCESS = "success"; - public static final String FAILURE = "failure"; - - public AbstractSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version.getDataset()); this.version = version; diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index e105ac30df7..00462acc9c2 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -149,13 +149,18 @@ - - + + + - + + + + - - + + From 8075a7156d86c90197c16d7bf835da0cc14a184c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 8 Apr 2022 18:43:47 -0400 Subject: [PATCH 039/608] revert TDL change --- modules/dataverse-parent/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 042933a3b67..ab5a915e7e9 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -129,7 +129,7 @@ - 5.10-tdl-dev + 5.10.1 11 UTF-8 From a10212177be59ea59e993593a2dec400f2288fb1 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 8 Apr 2022 18:44:04 -0400 Subject: [PATCH 040/608] fix typos/errors --- src/main/webapp/dataset-versions.xhtml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index 00462acc9c2..70f53ea9a75 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -149,14 +149,14 @@ - + - - + + From 96bd083d488233c5b9837e625cd87ef94d779495 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 8 Apr 2022 18:44:51 -0400 Subject: [PATCH 041/608] handle key with line breaks, add/update additional statuses for fails --- .../engine/command/impl/DRSSubmitToArchiveCommand.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index b1312f8f103..83997c11d50 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -105,11 +105,12 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t WorkflowStepResult s3Result = super.performArchiveSubmission(dv, token, requestedSettings); JsonObjectBuilder statusObject = Json.createObjectBuilder(); - statusObject.add("status", "Failure"); + statusObject.add("status", DatasetVersion.FAILURE); statusObject.add("message", "Bag not transferred"); if (s3Result == WorkflowStepResult.OK) { - statusObject.add("status", "Attempted"); + //This will be overwritten if the further steps are successful + statusObject.add("status", DatasetVersion.FAILURE); statusObject.add("message", "Bag transferred"); // Now contact DRS @@ -168,7 +169,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t ingestPost = new HttpPost(); ingestPost.setURI(new URI(drsConfigObject.getString("DRSendpoint"))); - byte[] encoded = Base64.getDecoder().decode(System.getProperty(RSA_KEY)); + byte[] encoded = Base64.getDecoder().decode(System.getProperty(RSA_KEY).replaceAll("[\\r\\n]", "")); KeyFactory keyFactory = KeyFactory.getInstance("RSA"); PKCS8EncodedKeySpec keySpec = new PKCS8EncodedKeySpec(encoded); @@ -211,6 +212,8 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t statusObject.add("message", responseObject.getString("message")); break; case FAILURE: + statusObject.add("status", status); + statusObject.add("message", responseObject.getString("message")); logger.severe( "DRS Ingest Failed for: " + packageId + " : " + responseObject.toString()); return new Failure("DRS Archiver fail in Ingest call"); From 3e860ea4aea7475279968ec3b0070e310977d83e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 8 Apr 2022 18:59:25 -0400 Subject: [PATCH 042/608] add realistic body, print canonical form --- .../impl/DRSSubmitToArchiveCommandTest.java | 34 ++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java index bf6f4dd8a4e..0bfd8ac18f2 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java @@ -3,6 +3,7 @@ import edu.harvard.iq.dataverse.branding.BrandingUtil; import org.apache.commons.codec.digest.DigestUtils; +import org.erdtman.jcs.JsonCanonicalizer; import org.junit.Assert; import org.junit.Test; @@ -64,6 +65,32 @@ public void createJWT() throws CommandException { + "uiGK3h/Wx+Xof2fQc4zTrNepNDpNj5kIToR2Ak933ZCqIK4kkRFTeXZR4hVe3gee" + "+wIDAQAB"; + String fakeBody = "{\n" + + " \"s3_bucket_name\": \"dataverse-export-dev\",\n" + + " \"package_id\": \"doi-10-5072-fk2-e6cmkr.v1.18\",\n" + + " \"s3_path\": \"doi-10-5072-fk2-e6cmkr\",\n" + + " \"admin_metadata\": {\n" + + " \"accessFlag\": \"N\",\n" + + " \"contentModel\": \"opaque\",\n" + + " \"depositingSystem\": \"Harvard Dataverse\",\n" + + " \"firstGenerationInDrs\": \"unspecified\",\n" + + " \"objectRole\": \"CG:DATASET\",\n" + + " \"usageClass\": \"LOWUSE\",\n" + + " \"storageClass\": \"AR\",\n" + + " \"s3_bucket_name\": \"dataverse-export-dev\",\n" + + " \"ownerCode\": \"123\",\n" + + " \"billingCode\": \"456\",\n" + + " \"resourceNamePattern\": \"pattern\",\n" + + " \"urnAuthorityPath\": \"path\",\n" + + " \"depositAgent\": \"789\",\n" + + " \"depositAgentEmail\": \"someone@mailinator.com\",\n" + + " \"successEmail\": \"winner@mailinator.com\",\n" + + " \"failureEmail\": \"loser@mailinator.com\",\n" + + " \"successMethod\": \"method\",\n" + + " \"adminCategory\": \"root\"\n" + + " }\n" + + "}"; + byte[] encoded = Base64.getDecoder().decode(privKeyString); try { KeyFactory keyFactory = KeyFactory.getInstance("RSA"); @@ -77,9 +104,14 @@ public void createJWT() throws CommandException { * keySpec = new X509EncodedKeySpec(encoded); return (RSAPublicKey) * keyFactory.generatePublic(keySpec); RSAPublicKey publicKey = new * RSAPublicKey(System.getProperty(RS256_KEY)); + * + * */ + String canonicalBody = new JsonCanonicalizer(fakeBody).getEncodedString(); + System.out.println("Canonical form:"+ canonicalBody); + Algorithm algorithmRSA = Algorithm.RSA256(null, privKey); - String token1 = DRSSubmitToArchiveCommand.createJWTString(algorithmRSA, BrandingUtil.getInstallationBrandName(), "{\"stuff\":\"important\"}", 5); + String token1 = DRSSubmitToArchiveCommand.createJWTString(algorithmRSA, BrandingUtil.getInstallationBrandName(), fakeBody, 5); System.out.println("JWT: " + token1); DecodedJWT jwt = JWT.decode(token1); From 24336e176eb43c1be911c5c4f1c89544071c7d20 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 12 Apr 2022 11:06:23 -0400 Subject: [PATCH 043/608] change api names, fix ok response on status put --- src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 93a6abee9fa..807eb089c16 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -1787,7 +1787,7 @@ public void run() { @GET @Produces(MediaType.APPLICATION_JSON) - @Path("/submitDataVersionToArchive/{id}/{version}/status") + @Path("/submitDatasetVersionToArchive/{id}/{version}/status") public Response getDatasetVersionToArchiveStatus(@PathParam("id") String dsid, @PathParam("version") String versionNumber) { @@ -1812,7 +1812,7 @@ public Response getDatasetVersionToArchiveStatus(@PathParam("id") String dsid, @PUT @Consumes(MediaType.APPLICATION_JSON) - @Path("/submitDataVersionToArchive/{id}/{version}/status") + @Path("/submitDatasetVersionToArchive/{id}/{version}/status") public Response setDatasetVersionToArchiveStatus(@PathParam("id") String dsid, @PathParam("version") String versionNumber, JsonObject update) { @@ -1842,6 +1842,7 @@ public Response setDatasetVersionToArchiveStatus(@PathParam("id") String dsid, return error(Status.NOT_FOUND, "Dataset version not found"); } dv.setArchivalCopyLocation(JsonUtil.prettyPrint(update)); + return ok("Status updated"); } catch (WrappedResponse e) { return error(Status.NOT_FOUND, "Dataset not found"); @@ -1850,9 +1851,6 @@ public Response setDatasetVersionToArchiveStatus(@PathParam("id") String dsid, } return error(Status.BAD_REQUEST, "Unacceptable status format"); } - - - /** * Iteratively archives all unarchived dataset versions From 9cef6d4a16a8e5d204f512f16089e17588906420 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 12 Apr 2022 11:06:39 -0400 Subject: [PATCH 044/608] store extra fields from ingest --- .../impl/DRSSubmitToArchiveCommand.java | 52 ++++++++++++------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 83997c11d50..d8cbfe5b114 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -63,8 +63,6 @@ public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implemen private static final Logger logger = Logger.getLogger(DRSSubmitToArchiveCommand.class.getName()); private static final String DRS_CONFIG = ":DRSArchivalConfig"; - private static final String FAILURE = "failure"; - private static final String PENDING = "pending"; private static final String ADMIN_METADATA = "admin_metadata"; private static final String S3_BUCKET_NAME = "s3_bucket_name"; private static final String S3_PATH = "s3_path"; @@ -111,7 +109,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t if (s3Result == WorkflowStepResult.OK) { //This will be overwritten if the further steps are successful statusObject.add("status", DatasetVersion.FAILURE); - statusObject.add("message", "Bag transferred"); + statusObject.add("message", "Bag transferred, ingest failed"); // Now contact DRS boolean trustCert = drsConfigObject.getBoolean(TRUST_CERT, false); @@ -203,26 +201,40 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t logger.info("Status: " + code); logger.info("Response" + responseBody); JsonObject responseObject = JsonUtil.getJsonObject(responseBody); - String status = responseObject.getString("status"); - switch (status) { - case PENDING: - logger.info("DRS Ingest successfully started for: " + packageId + " : " - + responseObject.toString()); - statusObject.add("status", status); - statusObject.add("message", responseObject.getString("message")); - break; - case FAILURE: - statusObject.add("status", status); - statusObject.add("message", responseObject.getString("message")); - logger.severe( - "DRS Ingest Failed for: " + packageId + " : " + responseObject.toString()); - return new Failure("DRS Archiver fail in Ingest call"); - default: - logger.warning("Unexpected Status: " + status); + if (responseObject.containsKey(DatasetVersion.STATUS) + && responseObject.containsKey(DatasetVersion.MESSAGE)) { + String status = responseObject.getString(DatasetVersion.STATUS); + if (status.equals(DatasetVersion.PENDING) || status.equals(DatasetVersion.FAILURE) + || status.equals(DatasetVersion.SUCCESS)) { + statusObject.addAll(Json.createObjectBuilder(responseObject)); + switch (status) { + case DatasetVersion.PENDING: + logger.info("DRS Ingest successfully started for: " + packageId + " : " + + responseObject.toString()); + break; + case DatasetVersion.FAILURE: + logger.severe("DRS Ingest Failed for: " + packageId + " : " + + responseObject.toString()); + return new Failure("DRS Archiver fail in Ingest call"); + case DatasetVersion.SUCCESS: + // We don't expect this from DRS + logger.warning("Unexpected Status: " + status); + } + } else { + logger.severe("DRS Ingest Failed for: " + packageId + " with returned status: " + + status); + return new Failure( + "DRS Archiver fail in Ingest call with returned status: " + status); + } + } else { + logger.severe("DRS Ingest Failed for: " + packageId + + " - response does not include status and message"); + return new Failure( + "DRS Archiver fail in Ingest call \" - response does not include status and message"); } } else { logger.severe("DRS Ingest Failed for: " + packageId + " with status code: " + code); - return new Failure("DRS Archiver fail in Ingest call with status cvode: " + code); + return new Failure("DRS Archiver fail in Ingest call with status code: " + code); } } catch (ClientProtocolException e2) { e2.printStackTrace(); From f1887d5067c65a7ea52391a57e1daca831891afa Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 12 Apr 2022 11:07:08 -0400 Subject: [PATCH 045/608] show basic archival state to those who can viewunpublisheddataset --- src/main/webapp/dataset-versions.xhtml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index 70f53ea9a75..aaaaf8d4593 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -147,7 +147,7 @@ - + From ee40121a87445ddf1c400ba0401e0b7d51ca5945 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 13 Apr 2022 10:32:00 -0400 Subject: [PATCH 046/608] Don't check permissions when dataset id is null (during create) Not sure why the dataset-version.xhtml is getting included at all on the dataset create page - should stop this instead of just avoiding a failure in the render logic for the "Archived" column. --- src/main/webapp/dataset-versions.xhtml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index aaaaf8d4593..37f7906c640 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -147,7 +147,8 @@ - + + From 2d5fc86360fdc434450a2fe74c276ba22aa98a49 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 13 Apr 2022 16:18:43 -0400 Subject: [PATCH 047/608] move archiving status api to datasets, fix merge/caching issues since api/admin is usually blocked, putting the status apis in api/datesets makes it easier for a remote archiving service to set status. Still keeping the calls to start archiving manually/start a batch archiving job in admin Also fixed display status caching issue and adjusted for the fact that Admin is a @Stateless bean and Datasets is not (the former appears to update the db when dv.setArchivalCopyLocation is set, the latter doesn't unless a merge is called.) --- .../harvard/iq/dataverse/DatasetVersion.java | 10 +-- .../dataverse/DatasetVersionServiceBean.java | 8 ++ .../edu/harvard/iq/dataverse/api/Admin.java | 67 +--------------- .../harvard/iq/dataverse/api/Datasets.java | 80 ++++++++++++++++++- 4 files changed, 93 insertions(+), 72 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 4007f7f1cbc..8d10f970786 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -332,7 +332,7 @@ public String getArchivalCopyLocation() { } public String getArchivalCopyLocationStatus() { - populateArchivalStatus(); + populateArchivalStatus(false); if(archivalStatus!=null) { return archivalStatus.getString(STATUS); @@ -340,15 +340,15 @@ public String getArchivalCopyLocationStatus() { return null; } public String getArchivalCopyLocationMessage() { - populateArchivalStatus(); + populateArchivalStatus(false); if(archivalStatus!=null) { return archivalStatus.getString(MESSAGE); } return null; } - private void populateArchivalStatus() { - if(archivalStatus ==null) { + private void populateArchivalStatus(boolean force) { + if(archivalStatus ==null || force) { if(archivalCopyLocation!=null) { try { archivalStatus = JsonUtil.getJsonObject(archivalCopyLocation); @@ -361,7 +361,7 @@ private void populateArchivalStatus() { public void setArchivalCopyLocation(String location) { this.archivalCopyLocation = location; - populateArchivalStatus(); + populateArchivalStatus(true); } public String getDeaccessionLink() { diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index 150cd656aed..9c79646e7d4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -1211,4 +1211,12 @@ public List getUnarchivedDatasetVersions(){ } } // end getUnarchivedDatasetVersions + /** + * Merges the passed datasetversion to the persistence context. + * @param ver the DatasetVersion whose new state we want to persist. + * @return The managed entity representing {@code ver}. + */ + public DatasetVersion merge( DatasetVersion ver ) { + return em.merge(ver); + } } // end class diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 807eb089c16..440d64985df 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -18,6 +18,7 @@ import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.UserServiceBean; import edu.harvard.iq.dataverse.actionlogging.ActionLogRecord; +import edu.harvard.iq.dataverse.api.AbstractApiBean.WrappedResponse; import edu.harvard.iq.dataverse.api.dto.RoleDTO; import edu.harvard.iq.dataverse.authorization.AuthenticatedUserDisplayInfo; import edu.harvard.iq.dataverse.authorization.AuthenticationProvider; @@ -1784,73 +1785,7 @@ public void run() { return error(Status.UNAUTHORIZED, "api key required"); } } - - @GET - @Produces(MediaType.APPLICATION_JSON) - @Path("/submitDatasetVersionToArchive/{id}/{version}/status") - public Response getDatasetVersionToArchiveStatus(@PathParam("id") String dsid, - @PathParam("version") String versionNumber) { - - try { - AuthenticatedUser au = findAuthenticatedUserOrDie(); - if (!au.isSuperuser()) { - return error(Response.Status.FORBIDDEN, "Superusers only."); - } - Dataset ds = findDatasetOrDie(dsid); - - DatasetVersion dv = datasetversionService.findByFriendlyVersionNumber(ds.getId(), versionNumber); - if (dv.getArchivalCopyLocation() == null) { - return error(Status.NO_CONTENT, "This dataset version has not been archived"); - } else { - JsonObject status = JsonUtil.getJsonObject(dv.getArchivalCopyLocation()); - return ok(status); - } - } catch (WrappedResponse e1) { - return error(Status.UNAUTHORIZED, "api key required"); - } - } - - @PUT - @Consumes(MediaType.APPLICATION_JSON) - @Path("/submitDatasetVersionToArchive/{id}/{version}/status") - public Response setDatasetVersionToArchiveStatus(@PathParam("id") String dsid, - @PathParam("version") String versionNumber, JsonObject update) { - - try { - AuthenticatedUser au = findAuthenticatedUserOrDie(); - - if (!au.isSuperuser()) { - return error(Response.Status.FORBIDDEN, "Superusers only."); - } - } catch (WrappedResponse e1) { - return error(Status.UNAUTHORIZED, "api key required"); - } - if (update.containsKey(DatasetVersion.STATUS) - && update.containsKey(DatasetVersion.MESSAGE)) { - String status = update.getString(DatasetVersion.STATUS); - if (status.equals(DatasetVersion.PENDING) - || status.equals(DatasetVersion.FAILURE) - || status.equals(DatasetVersion.SUCCESS)) { - - try { - Dataset ds; - - ds = findDatasetOrDie(dsid); - - DatasetVersion dv = datasetversionService.findByFriendlyVersionNumber(ds.getId(), versionNumber); - if(dv==null) { - return error(Status.NOT_FOUND, "Dataset version not found"); - } - dv.setArchivalCopyLocation(JsonUtil.prettyPrint(update)); - return ok("Status updated"); - } catch (WrappedResponse e) { - return error(Status.NOT_FOUND, "Dataset not found"); - } - } - } - return error(Status.BAD_REQUEST, "Unacceptable status format"); - } /** * Iteratively archives all unarchived dataset versions diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index e21396dd487..1bd8384d4b0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -57,7 +57,7 @@ import edu.harvard.iq.dataverse.export.ExportService; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; - +import edu.harvard.iq.dataverse.api.AbstractApiBean.WrappedResponse; import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO; import edu.harvard.iq.dataverse.batch.util.LoggingUtil; import edu.harvard.iq.dataverse.dataaccess.DataAccess; @@ -86,6 +86,7 @@ import edu.harvard.iq.dataverse.util.json.JSONLDUtil; import edu.harvard.iq.dataverse.util.json.JsonLDTerm; import edu.harvard.iq.dataverse.util.json.JsonParseException; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.search.IndexServiceBean; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.*; import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; @@ -217,6 +218,9 @@ public class Datasets extends AbstractApiBean { @Inject DataverseRoleServiceBean dataverseRoleService; + + @EJB + DatasetVersionServiceBean datasetversionService; /** * Used to consolidate the way we parse and handle dataset versions. @@ -3279,4 +3283,78 @@ public Response getCurationStates() throws WrappedResponse { csvSB.append("\n"); return ok(csvSB.toString(), MediaType.valueOf(FileUtil.MIME_TYPE_CSV), "datasets.status.csv"); } + + + @GET + @Produces(MediaType.APPLICATION_JSON) + @Path("/submitDatasetVersionToArchive/{id}/{version}/status") + public Response getDatasetVersionToArchiveStatus(@PathParam("id") String dsid, + @PathParam("version") String versionNumber) { + + try { + AuthenticatedUser au = findAuthenticatedUserOrDie(); + if (!au.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Superusers only."); + } + Dataset ds = findDatasetOrDie(dsid); + + DatasetVersion dv = datasetversionService.findByFriendlyVersionNumber(ds.getId(), versionNumber); + if (dv.getArchivalCopyLocation() == null) { + return error(Status.NO_CONTENT, "This dataset version has not been archived"); + } else { + JsonObject status = JsonUtil.getJsonObject(dv.getArchivalCopyLocation()); + return ok(status); + } + } catch (WrappedResponse e1) { + return error(Status.UNAUTHORIZED, "api key required"); + } + } + + @PUT + @Consumes(MediaType.APPLICATION_JSON) + @Path("/submitDatasetVersionToArchive/{id}/{version}/status") + public Response setDatasetVersionToArchiveStatus(@PathParam("id") String dsid, + @PathParam("version") String versionNumber, JsonObject update) { + + logger.info(JsonUtil.prettyPrint(update)); + try { + AuthenticatedUser au = findAuthenticatedUserOrDie(); + + if (!au.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Superusers only."); + } + } catch (WrappedResponse e1) { + return error(Status.UNAUTHORIZED, "api key required"); + } + if (update.containsKey(DatasetVersion.STATUS) + && update.containsKey(DatasetVersion.MESSAGE)) { + String status = update.getString(DatasetVersion.STATUS); + if (status.equals(DatasetVersion.PENDING) + || status.equals(DatasetVersion.FAILURE) + || status.equals(DatasetVersion.SUCCESS)) { + + try { + Dataset ds; + + ds = findDatasetOrDie(dsid); + + DatasetVersion dv = datasetversionService.findByFriendlyVersionNumber(ds.getId(), versionNumber); + if(dv==null) { + return error(Status.NOT_FOUND, "Dataset version not found"); + } + dv.setArchivalCopyLocation(JsonUtil.prettyPrint(update)); + dv = datasetversionService.merge(dv); + logger.info("location now: " + dv.getArchivalCopyLocation()); + logger.info("status now: " + dv.getArchivalCopyLocationStatus()); + logger.info("message now: " + dv.getArchivalCopyLocationMessage()); + + return ok("Status updated"); + + } catch (WrappedResponse e) { + return error(Status.NOT_FOUND, "Dataset not found"); + } + } + } + return error(Status.BAD_REQUEST, "Unacceptable status format"); + } } From 51c1ba8a569725bd0372156206f463eb2fa27df2 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 27 Apr 2022 14:06:59 -0400 Subject: [PATCH 048/608] fix wrapped error handling --- .../java/edu/harvard/iq/dataverse/api/Datasets.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index c58df903652..baa9644700e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3308,8 +3308,8 @@ public Response getDatasetVersionToArchiveStatus(@PathParam("id") String dsid, JsonObject status = JsonUtil.getJsonObject(dv.getArchivalCopyLocation()); return ok(status); } - } catch (WrappedResponse e1) { - return error(Status.UNAUTHORIZED, "api key required"); + } catch (WrappedResponse wr) { + return wr.getResponse(); } } @@ -3326,8 +3326,8 @@ public Response setDatasetVersionToArchiveStatus(@PathParam("id") String dsid, if (!au.isSuperuser()) { return error(Response.Status.FORBIDDEN, "Superusers only."); } - } catch (WrappedResponse e1) { - return error(Status.UNAUTHORIZED, "api key required"); + } catch (WrappedResponse wr) { + return wr.getResponse(); } if (update.containsKey(DatasetVersion.STATUS) && update.containsKey(DatasetVersion.MESSAGE)) { @@ -3353,8 +3353,8 @@ public Response setDatasetVersionToArchiveStatus(@PathParam("id") String dsid, return ok("Status updated"); - } catch (WrappedResponse e) { - return error(Status.NOT_FOUND, "Dataset not found"); + } catch (WrappedResponse wr) { + return wr.getResponse(); } } } From 5b2936321e6628a544fb3a4d142e44a0729f2028 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 27 Apr 2022 14:07:39 -0400 Subject: [PATCH 049/608] add debug logging for 5.2021.5 --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 6adadb98429..db377090cf6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -355,6 +355,7 @@ private void populateArchivalStatus(boolean force) { archivalStatus = JsonUtil.getJsonObject(archivalCopyLocation); } catch(Exception e) { logger.warning("DatasetVersion id: " + id + "has a non-JsonObject value, parsing error: " + e.getMessage()); + logger.info(archivalCopyLocation); } } } From 9aac7e95c1a7b14b271b63b41ab032886259defc Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 27 Apr 2022 14:12:10 -0400 Subject: [PATCH 050/608] fix header, error status, debug logging --- .../command/impl/DRSSubmitToArchiveCommand.java | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index d8cbfe5b114..391ecb7b1d6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -109,7 +109,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t if (s3Result == WorkflowStepResult.OK) { //This will be overwritten if the further steps are successful statusObject.add("status", DatasetVersion.FAILURE); - statusObject.add("message", "Bag transferred, ingest failed"); + statusObject.add("message", "Bag transferred, DRS ingest call failed"); // Now contact DRS boolean trustCert = drsConfigObject.getBoolean(TRUST_CERT, false); @@ -187,7 +187,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t String jwtString = createJWTString(algorithmRSA, BrandingUtil.getInstallationBrandName(), body, 5); logger.info("JWT: " + jwtString); - ingestPost.setHeader("Authorization: Bearer", jwtString); + ingestPost.setHeader("Authorization", "Bearer " + jwtString); logger.info("Body: " + body); ingestPost.setEntity(new StringEntity(body, "utf-8")); @@ -234,6 +234,8 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } } else { logger.severe("DRS Ingest Failed for: " + packageId + " with status code: " + code); + logger.info("Status: " + code); + logger.info("Response" + responseBody); return new Failure("DRS Archiver fail in Ingest call with status code: " + code); } } catch (ClientProtocolException e2) { @@ -249,21 +251,21 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } // execute catch (InvalidKeySpecException e) { - // TODO Auto-generated catch block e.printStackTrace(); } catch (NoSuchAlgorithmException e) { -// TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { - // TODO Auto-generated catch block e.printStackTrace(); + } finally { + //Set status after success or failure + dv.setArchivalCopyLocation(statusObject.build().toString()); } } else { - logger.warning("DRS: S3 archiving failed - will not call ingest: " + packageId); + dv.setArchivalCopyLocation(statusObject.build().toString()); return new Failure("DRS Archiver fail in initial S3 Archiver transfer"); } - dv.setArchivalCopyLocation(statusObject.build().toString()); + } else { logger.info("DRS Archiver: No matching collection found - will not archive: " + packageId); return WorkflowStepResult.OK; From ac234374cf02f712c2c24da4fc13e1a39a80172b Mon Sep 17 00:00:00 2001 From: Robert Treacy Date: Mon, 2 May 2022 15:19:54 -0400 Subject: [PATCH 051/608] add signed Url to header and use POST for external tools, in particular DPCreator WIP - still need to handle use of signed Url to access resource on dataverse --- .../iq/dataverse/ConfigureFragmentBean.java | 1 + .../externaltools/ExternalToolHandler.java | 150 ++++-------------- 2 files changed, 31 insertions(+), 120 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ConfigureFragmentBean.java b/src/main/java/edu/harvard/iq/dataverse/ConfigureFragmentBean.java index d51a73fd2dc..58752af8520 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ConfigureFragmentBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ConfigureFragmentBean.java @@ -106,6 +106,7 @@ public void generateApiToken() { ApiToken apiToken = new ApiToken(); User user = session.getUser(); if (user instanceof AuthenticatedUser) { + toolHandler.setUser(((AuthenticatedUser) user).getUserIdentifier()); apiToken = authService.findApiTokenByUser((AuthenticatedUser) user); if (apiToken == null) { //No un-expired token diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java index 84d5b75e34c..baa386485d3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java @@ -2,31 +2,28 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.externaltools.ExternalTool.ReservedWord; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.UrlSignerUtil; import java.io.IOException; import java.io.StringReader; import java.net.HttpURLConnection; import java.net.URI; -import java.net.URLEncoder; import java.net.http.HttpClient; import java.net.http.HttpRequest; import java.net.http.HttpResponse; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.logging.Level; import java.util.logging.Logger; import javax.json.Json; import javax.json.JsonArray; import javax.json.JsonObject; import javax.json.JsonReader; +import javax.json.JsonString; import javax.ws.rs.HttpMethod; /** @@ -36,6 +33,13 @@ */ public class ExternalToolHandler { + /** + * @param user the user to set + */ + public void setUser(String user) { + this.user = user; + } + private static final Logger logger = Logger.getLogger(ExternalToolHandler.class.getCanonicalName()); private final ExternalTool externalTool; @@ -47,7 +51,9 @@ public class ExternalToolHandler { private String localeCode; private String requestMethod; private String toolContext; - + private String user; + private String siteUrl; + /** * File level tool * @@ -121,19 +127,11 @@ public String handleRequest() { // TODO: rename to handleRequest() to someday handle sending headers as well as query parameters. public String handleRequest(boolean preview) { - requestMethod = requestMethod(); - if (requestMethod().equals(HttpMethod.POST)){ - try { - return getFormData(); - } catch (IOException ex) { - Logger.getLogger(ExternalToolHandler.class.getName()).log(Level.SEVERE, null, ex); - } catch (InterruptedException ex) { - Logger.getLogger(ExternalToolHandler.class.getName()).log(Level.SEVERE, null, ex); - } - } String toolParameters = externalTool.getToolParameters(); JsonReader jsonReader = Json.createReader(new StringReader(toolParameters)); JsonObject obj = jsonReader.readObject(); + JsonString method = obj.getJsonString("httpMethod"); + requestMethod = method!=null?method.getString():HttpMethod.GET; JsonArray queryParams = obj.getJsonArray("queryParameters"); if (queryParams == null || queryParams.isEmpty()) { return ""; @@ -147,7 +145,14 @@ public String handleRequest(boolean preview) { params.add(param); } }); - }); + }); + if (requestMethod.equals(HttpMethod.POST)){ + try { + return postFormData(obj.getJsonNumber("timeOut").intValue(), params); + } catch (IOException | InterruptedException ex) { + Logger.getLogger(ExternalToolHandler.class.getName()).log(Level.SEVERE, null, ex); + } + } if (!preview) { return "?" + String.join("&", params); } else { @@ -168,7 +173,8 @@ private String getQueryParam(String key, String value) { } break; case SITE_URL: - return key + "=" + SystemConfig.getDataverseSiteUrlStatic(); + siteUrl = SystemConfig.getDataverseSiteUrlStatic(); + return key + "=" + siteUrl; case API_TOKEN: String apiTokenString = null; ApiToken theApiToken = getApiToken(); @@ -209,85 +215,16 @@ private String getQueryParam(String key, String value) { return null; } - private String getFormDataValue(String key, String value) { - ReservedWord reservedWord = ReservedWord.fromString(value); - switch (reservedWord) { - case FILE_ID: - // getDataFile is never null for file tools because of the constructor - return ""+getDataFile().getId(); - case FILE_PID: - GlobalId filePid = getDataFile().getGlobalId(); - if (filePid != null) { - return ""+getDataFile().getGlobalId(); - } - break; - case SITE_URL: - return ""+SystemConfig.getDataverseSiteUrlStatic(); - case API_TOKEN: - String apiTokenString = null; - ApiToken theApiToken = getApiToken(); - if (theApiToken != null) { - apiTokenString = theApiToken.getTokenString(); - return "" + apiTokenString; - } - break; - case DATASET_ID: - return "" + dataset.getId(); - case DATASET_PID: - return "" + dataset.getGlobalId().asString(); - case DATASET_VERSION: - String versionString = null; - if(fileMetadata!=null) { //true for file case - versionString = fileMetadata.getDatasetVersion().getFriendlyVersionNumber(); - } else { //Dataset case - return the latest visible version (unless/until the dataset case allows specifying a version) - if (getApiToken() != null) { - versionString = dataset.getLatestVersion().getFriendlyVersionNumber(); - } else { - versionString = dataset.getLatestVersionForCopy().getFriendlyVersionNumber(); - } - } - if (("DRAFT").equals(versionString)) { - versionString = ":draft"; // send the token needed in api calls that can be substituted for a numeric - // version. - } - return "" + versionString; - case FILE_METADATA_ID: - if(fileMetadata!=null) { //true for file case - return "" + fileMetadata.getId(); - } - case LOCALE_CODE: - return "" + getLocaleCode(); - default: - break; - } - return null; - } - - private String getFormData() throws IOException, InterruptedException{ + private String postFormData(Integer timeout,List params ) throws IOException, InterruptedException{ String url = ""; - String toolParameters = externalTool.getToolParameters(); - JsonReader jsonReader = Json.createReader(new StringReader(toolParameters)); - JsonObject obj = jsonReader.readObject(); - JsonArray queryParams = obj.getJsonArray("queryParameters"); - if (queryParams == null || queryParams.isEmpty()) { - return ""; - } - Map data = new HashMap<>(); - queryParams.getValuesAs(JsonObject.class).forEach((queryParam) -> { - queryParam.keySet().forEach((key) -> { - String value = queryParam.getString(key); - String param = getFormDataValue(key, value); - if (param != null && !param.isEmpty()) { - data.put(key,param); - } - }); - }); +// Integer timeout = obj.getJsonNumber("timeOut").intValue(); + url = UrlSignerUtil.signUrl(siteUrl, timeout, user, HttpMethod.POST, getApiToken().getTokenString()); HttpClient client = HttpClient.newHttpClient(); - HttpRequest request = HttpRequest.newBuilder().POST(ofFormData(data)).uri(URI.create(externalTool.getToolUrl())) + HttpRequest request = HttpRequest.newBuilder().POST(HttpRequest.BodyPublishers.ofString(String.join("&", params))).uri(URI.create(externalTool.getToolUrl())) .header("Content-Type", "application/x-www-form-urlencoded") - .build(); - + .header("signedUrl", url) + .build(); HttpResponse response = client.send(request, HttpResponse.BodyHandlers.ofString()); boolean redirect=false; int status = response.statusCode(); @@ -300,40 +237,13 @@ private String getFormData() throws IOException, InterruptedException{ } if (redirect=true){ String newUrl = response.headers().firstValue("location").get(); - System.out.println(newUrl); toolContext = "http://" + response.uri().getAuthority(); url = newUrl; } - - System.out.println(response.statusCode()); - System.out.println(response.body()); - return url; - } - public static HttpRequest.BodyPublisher ofFormData(Map data) { - var builder = new StringBuilder(); - data.entrySet().stream().map((var entry) -> { - if (builder.length() > 0) { - builder.append("&"); - } - StringBuilder append = builder.append(URLEncoder.encode(entry.getKey().toString(), StandardCharsets.UTF_8)); - return entry; - }).forEachOrdered(entry -> { - builder.append("="); - builder.append(URLEncoder.encode(entry.getValue().toString(), StandardCharsets.UTF_8)); - }); - return HttpRequest.BodyPublishers.ofString(builder.toString()); - } - - // placeholder for a way to use the POST method instead of the GET method - public String requestMethod(){ - if (externalTool.getDisplayName().startsWith("DP")) - return HttpMethod.POST; - return HttpMethod.GET; - } public String getToolUrlWithQueryParams() { String params = ExternalToolHandler.this.handleRequest(); return toolContext + params; From d295d868d57aa41b458c0b5803990bb62f6cc558 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Wed, 4 May 2022 17:23:33 +0200 Subject: [PATCH 052/608] sorting of licenses with the new sort order column --- .../harvard/iq/dataverse/api/Licenses.java | 31 +++++++++++++++++++ .../harvard/iq/dataverse/license/License.java | 26 +++++++++++++--- .../dataverse/license/LicenseServiceBean.java | 12 +++++++ .../iq/dataverse/util/json/JsonPrinter.java | 3 +- .../V5.10.1.1__8671-sorting_licenses.sql | 9 ++++++ .../iq/dataverse/DatasetVersionTest.java | 2 +- .../harvard/iq/dataverse/api/LicensesIT.java | 14 +++++++++ .../edu/harvard/iq/dataverse/api/UtilIT.java | 9 +++++- .../export/SchemaDotOrgExporterTest.java | 2 +- .../iq/dataverse/util/FileUtilTest.java | 4 +-- 10 files changed, 101 insertions(+), 11 deletions(-) create mode 100644 src/main/resources/db/migration/V5.10.1.1__8671-sorting_licenses.sql diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Licenses.java b/src/main/java/edu/harvard/iq/dataverse/api/Licenses.java index 58e1f8cc2c5..1fdf7818cfb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Licenses.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Licenses.java @@ -146,6 +146,37 @@ public Response setActiveState(@PathParam("id") long id, @PathParam("activeState } } + @PUT + @Path("/{id}/:sortOrder/{sortOrder}") + public Response setSortOrder(@PathParam("id") long id, @PathParam("sortOrder") long sortOrder) { + User authenticatedUser; + try { + authenticatedUser = findAuthenticatedUserOrDie(); + if (!authenticatedUser.isSuperuser()) { + return error(Status.FORBIDDEN, "must be superuser"); + } + } catch (WrappedResponse e) { + return error(Status.UNAUTHORIZED, "api key required"); + } + try { + if (licenseSvc.setSortOrder(id, sortOrder) == 0) { + return error(Response.Status.NOT_FOUND, "License with ID " + id + " not found"); + } + License license = licenseSvc.getById(id); + actionLogSvc + .log(new ActionLogRecord(ActionLogRecord.ActionType.Admin, "sortOrderLicenseChanged") + .setInfo("License " + license.getName() + "(" + license.getUri() + ") as id: " + id + + "has now sort order " + sortOrder + ".") + .setUserIdentifier(authenticatedUser.getIdentifier())); + return ok("License ID " + id + " sort order set to " + sortOrder); + } catch (WrappedResponse e) { + if (e.getCause() instanceof IllegalArgumentException) { + return badRequest(e.getCause().getMessage()); + } + return error(Response.Status.INTERNAL_SERVER_ERROR, e.getMessage()); + } + } + @DELETE @Path("/{id}") public Response deleteLicenseById(@PathParam("id") long id) { diff --git a/src/main/java/edu/harvard/iq/dataverse/license/License.java b/src/main/java/edu/harvard/iq/dataverse/license/License.java index 96baacc6731..4f99470d7b4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/license/License.java +++ b/src/main/java/edu/harvard/iq/dataverse/license/License.java @@ -23,9 +23,9 @@ */ @NamedQueries({ @NamedQuery( name="License.findAll", - query="SELECT l FROM License l ORDER BY (case when l.isDefault then 0 else 1 end), l.id asc"), + query="SELECT l FROM License l ORDER BY (case when l.isDefault then 0 else 1 end), l.sortOrder, l.name asc"), @NamedQuery( name="License.findAllActive", - query="SELECT l FROM License l WHERE l.active='true' ORDER BY (case when l.isDefault then 0 else 1 end), l.id asc"), + query="SELECT l FROM License l WHERE l.active='true' ORDER BY (case when l.isDefault then 0 else 1 end), l.sortOrder, l.name asc"), @NamedQuery( name="License.findById", query = "SELECT l FROM License l WHERE l.id=:id"), @NamedQuery( name="License.findDefault", @@ -42,6 +42,8 @@ query = "UPDATE License l SET l.isDefault='false'"), @NamedQuery( name="License.setActiveState", query = "UPDATE License l SET l.active=:state WHERE l.id=:id"), + @NamedQuery( name="License.setSortOrder", + query = "UPDATE License l SET l.sortOrder=:sortOrder WHERE l.id=:id"), }) @Entity @@ -73,6 +75,9 @@ public class License { @Column(nullable = false) private boolean isDefault; + + @Column(nullable = false) + private Long sortOrder; @OneToMany(mappedBy="license") private List termsOfUseAndAccess; @@ -80,7 +85,7 @@ public class License { public License() { } - public License(String name, String shortDescription, URI uri, URI iconUrl, boolean active) { + public License(String name, String shortDescription, URI uri, URI iconUrl, boolean active, Long sortOrder) { this.name = name; this.shortDescription = shortDescription; this.uri = uri.toASCIIString(); @@ -91,6 +96,7 @@ public License(String name, String shortDescription, URI uri, URI iconUrl, boole } this.active = active; isDefault = false; + this.sortOrder = sortOrder; } public Long getId() { @@ -172,17 +178,26 @@ public void setTermsOfUseAndAccess(List termsOfUseAndAccess this.termsOfUseAndAccess = termsOfUseAndAccess; } + public Long getSortOrder() { + return sortOrder; + } + + public void setSortOrder(Long sortOrder) { + this.sortOrder = sortOrder; + } + @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; License license = (License) o; - return active == license.active && id.equals(license.id) && name.equals(license.name) && shortDescription.equals(license.shortDescription) && uri.equals(license.uri) && Objects.equals(iconUrl, license.iconUrl); + return active == license.active && id.equals(license.id) && name.equals(license.name) && shortDescription.equals(license.shortDescription) && uri.equals(license.uri) && Objects.equals(iconUrl, license.iconUrl) + && Objects.equals(sortOrder, license.sortOrder); } @Override public int hashCode() { - return Objects.hash(id, name, shortDescription, uri, iconUrl, active); + return Objects.hash(id, name, shortDescription, uri, iconUrl, active, sortOrder); } @Override @@ -195,6 +210,7 @@ public String toString() { ", iconUrl=" + iconUrl + ", active=" + active + ", isDefault=" + isDefault + + ", sortOrder=" + sortOrder + '}'; } diff --git a/src/main/java/edu/harvard/iq/dataverse/license/LicenseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/license/LicenseServiceBean.java index c18e168685a..b554fecd437 100644 --- a/src/main/java/edu/harvard/iq/dataverse/license/LicenseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/license/LicenseServiceBean.java @@ -93,11 +93,23 @@ public int setActive(Long id, boolean state) throws WrappedResponse { new IllegalArgumentException("License already " + (state ? "active" : "inactive")), null); } } + + public int setSortOrder(Long id, Long sortOrder) throws WrappedResponse { + License candidate = getById(id); + if (candidate == null) + return 0; + + return em.createNamedQuery("License.setSortOrder").setParameter("id", id).setParameter("sortOrder", sortOrder) + .executeUpdate(); + } public License save(License license) throws WrappedResponse { if (license.getId() != null) { throw new WrappedResponse(new IllegalArgumentException("There shouldn't be an ID in the request body"), null); } + if (license.getSortOrder() == null) { + throw new WrappedResponse(new IllegalArgumentException("There should be a sort order value in the request body"), null); + } try { em.persist(license); em.flush(); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index ed3460b6759..e4f15e8992b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -800,7 +800,8 @@ public static JsonObjectBuilder json(License license) { .add("uri", license.getUri().toString()) .add("iconUrl", license.getIconUrl() == null ? null : license.getIconUrl().toString()) .add("active", license.isActive()) - .add("isDefault", license.isDefault()); + .add("isDefault", license.isDefault()) + .add("sortOrder", license.getSortOrder()); } public static Collector stringsToJsonArray() { diff --git a/src/main/resources/db/migration/V5.10.1.1__8671-sorting_licenses.sql b/src/main/resources/db/migration/V5.10.1.1__8671-sorting_licenses.sql new file mode 100644 index 00000000000..5bc18e69df0 --- /dev/null +++ b/src/main/resources/db/migration/V5.10.1.1__8671-sorting_licenses.sql @@ -0,0 +1,9 @@ +ALTER TABLE license +ADD COLUMN IF NOT EXISTS sortorder BIGINT; + +UPDATE license +SET sortorder = id +WHERE sortorder IS NULL; + +CREATE INDEX IF NOT EXISTS license_sortorder_id +ON license (sortorder, id); \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/DatasetVersionTest.java b/src/test/java/edu/harvard/iq/dataverse/DatasetVersionTest.java index 884a2fd6244..a8e011d0036 100644 --- a/src/test/java/edu/harvard/iq/dataverse/DatasetVersionTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/DatasetVersionTest.java @@ -92,7 +92,7 @@ public void testIsInReview() { @Test public void testGetJsonLd() throws ParseException { Dataset dataset = new Dataset(); - License license = new License("CC0 1.0", "You can copy, modify, distribute and perform the work, even for commercial purposes, all without asking permission.", URI.create("http://creativecommons.org/publicdomain/zero/1.0"), URI.create("/resources/images/cc0.png"), true); + License license = new License("CC0 1.0", "You can copy, modify, distribute and perform the work, even for commercial purposes, all without asking permission.", URI.create("http://creativecommons.org/publicdomain/zero/1.0"), URI.create("/resources/images/cc0.png"), true, 1l); license.setDefault(true); dataset.setProtocol("doi"); dataset.setAuthority("10.5072/FK2"); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/LicensesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/LicensesIT.java index 09443732f09..e189336b61e 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/LicensesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/LicensesIT.java @@ -144,6 +144,20 @@ public void testLicenses(){ status = JsonPath.from(body).getString("status"); assertEquals("OK", status); + //Fail trying to set null sort order + Response setSortOrderErrorResponse = UtilIT.setLicenseSortOrderById(activeLicenseId, null, adminApiToken); + setSortOrderErrorResponse.prettyPrint(); + body = setSortOrderErrorResponse.getBody().asString(); + status = JsonPath.from(body).getString("status"); + assertEquals("ERROR", status); + + //Succeed in setting sort order + Response setSortOrderResponse = UtilIT.setLicenseSortOrderById(activeLicenseId, 2l, adminApiToken); + setSortOrderResponse.prettyPrint(); + body = setSortOrderResponse.getBody().asString(); + status = JsonPath.from(body).getString("status"); + assertEquals("OK", status); + //Succeed in deleting our test license Response deleteLicenseByIdResponse = UtilIT.deleteLicenseById(licenseId, adminApiToken); deleteLicenseByIdResponse.prettyPrint(); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 7b9b5f3b129..f9bdabe367b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -2808,7 +2808,14 @@ static Response setLicenseActiveById(Long id, boolean state, String apiToken) { .put("/api/licenses/"+id.toString() + "/:active/" + state); return activateLicenseResponse; } - + + static Response setLicenseSortOrderById(Long id, Long sortOrder, String apiToken) { + Response setSortOrderLicenseResponse = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .urlEncodingEnabled(false) + .put("/api/licenses/"+id.toString() + "/:sortOrder/" + sortOrder); + return setSortOrderLicenseResponse; + } static Response updateDatasetJsonLDMetadata(Integer datasetId, String apiToken, String jsonLDBody, boolean replace) { Response response = given() diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index b5453e75fe5..641eaf68a3e 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -67,7 +67,7 @@ public static void tearDownClass() { public void testExportDataset() throws Exception { File datasetVersionJson = new File("src/test/resources/json/dataset-finch2.json"); String datasetVersionAsJson = new String(Files.readAllBytes(Paths.get(datasetVersionJson.getAbsolutePath()))); - License license = new License("CC0 1.0", "You can copy, modify, distribute and perform the work, even for commercial purposes, all without asking permission.", URI.create("http://creativecommons.org/publicdomain/zero/1.0/"), URI.create("/resources/images/cc0.png"), true); + License license = new License("CC0 1.0", "You can copy, modify, distribute and perform the work, even for commercial purposes, all without asking permission.", URI.create("http://creativecommons.org/publicdomain/zero/1.0/"), URI.create("/resources/images/cc0.png"), true, 1l); license.setDefault(true); JsonReader jsonReader1 = Json.createReader(new StringReader(datasetVersionAsJson)); diff --git a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java index 141e97b9b9b..7b5a5ef9d78 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java @@ -138,7 +138,7 @@ public void testIsDownloadPopupRequiredLicenseCC0() { DatasetVersion dsv1 = new DatasetVersion(); dsv1.setVersionState(DatasetVersion.VersionState.RELEASED); TermsOfUseAndAccess termsOfUseAndAccess = new TermsOfUseAndAccess(); - License license = new License("CC0", "You can copy, modify, distribute and perform the work, even for commercial purposes, all without asking permission.", URI.create("http://creativecommons.org/publicdomain/zero/1.0"), URI.create("/resources/images/cc0.png"), true); + License license = new License("CC0", "You can copy, modify, distribute and perform the work, even for commercial purposes, all without asking permission.", URI.create("http://creativecommons.org/publicdomain/zero/1.0"), URI.create("/resources/images/cc0.png"), true, 1l); license.setDefault(true); termsOfUseAndAccess.setLicense(license); dsv1.setTermsOfUseAndAccess(termsOfUseAndAccess); @@ -155,7 +155,7 @@ public void testIsDownloadPopupRequiredHasTermsOfUseAndCc0License() { * the popup when the are Terms of Use. This feels like a bug since the * Terms of Use should probably be shown. */ - License license = new License("CC0", "You can copy, modify, distribute and perform the work, even for commercial purposes, all without asking permission.", URI.create("http://creativecommons.org/publicdomain/zero/1.0"), URI.create("/resources/images/cc0.png"), true); + License license = new License("CC0", "You can copy, modify, distribute and perform the work, even for commercial purposes, all without asking permission.", URI.create("http://creativecommons.org/publicdomain/zero/1.0"), URI.create("/resources/images/cc0.png"), true, 2l); license.setDefault(true); termsOfUseAndAccess.setLicense(license); termsOfUseAndAccess.setTermsOfUse("be excellent to each other"); From 343155d4b15834721b0724b03ca34995d1b87ed9 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 5 May 2022 11:00:42 -0400 Subject: [PATCH 053/608] adding configurable timeout --- .../engine/command/impl/DRSSubmitToArchiveCommand.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 391ecb7b1d6..8f1805f2b91 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -72,6 +72,7 @@ public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implemen private static final String RSA_KEY = "dataverse.archiver.drs.rsa_key"; private static final String TRUST_CERT = "trust_cert"; + private static final String TIMEOUT = "timeout"; public DRSSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version); @@ -113,7 +114,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t // Now contact DRS boolean trustCert = drsConfigObject.getBoolean(TRUST_CERT, false); - + int jwtTimeout = drsConfigObject.getInt(TIMEOUT, 5); JsonObjectBuilder job = Json.createObjectBuilder(); job.add(S3_BUCKET_NAME, adminMetadata.getString(S3_BUCKET_NAME)); @@ -184,7 +185,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t Algorithm algorithmRSA = Algorithm.RSA256(null, privKey); String body = drsConfigString; - String jwtString = createJWTString(algorithmRSA, BrandingUtil.getInstallationBrandName(), body, 5); + String jwtString = createJWTString(algorithmRSA, BrandingUtil.getInstallationBrandName(), body, jwtTimeout); logger.info("JWT: " + jwtString); ingestPost.setHeader("Authorization", "Bearer " + jwtString); From c9ff44b09130222a9f60c203d199b06f21f01ed2 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Mon, 9 May 2022 12:25:43 +0200 Subject: [PATCH 054/608] license sorting documentation --- doc/release-notes/8671-sorting-licenses.md | 3 +++ doc/sphinx-guides/source/api/native-api.rst | 7 ++++++ .../source/installation/config.rst | 24 +++++++++++++++++++ 3 files changed, 34 insertions(+) create mode 100644 doc/release-notes/8671-sorting-licenses.md diff --git a/doc/release-notes/8671-sorting-licenses.md b/doc/release-notes/8671-sorting-licenses.md new file mode 100644 index 00000000000..34ad697d5a7 --- /dev/null +++ b/doc/release-notes/8671-sorting-licenses.md @@ -0,0 +1,3 @@ +## License sorting + +Licenses as shown in the dropdown in UI can be now sorted by the superusers. See [Configuring Licenses](https://guides.dataverse.org/en/5.10/installation/config.html#configuring-licenses) section of the Installation Guide for reference. \ No newline at end of file diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 5c56166dd6a..cb387dbbef2 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -3806,3 +3806,10 @@ Superusers can delete a license that is not in use by the license ``$ID``: .. code-block:: bash curl -X DELETE -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/licenses/$ID + +Superusers can change the sorting order of a license specified by the license ``$ID``: + +.. code-block:: bash + + export SORT_ORDER=100 + curl -X PUT -H 'Content-Type: application/json' -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/licenses/$ID/:sortOrder/$SORT_ORDER \ No newline at end of file diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 55d96335a68..d0a7cff1ea3 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -997,6 +997,30 @@ Disabling Custom Dataset Terms See :ref:`:AllowCustomTermsOfUse` for how to disable the "Custom Dataset Terms" option. +.. _ChangeLicenseSortOrder: + +Sorting licenses +---------------- + +The default order of licenses in the dropdown in the user interface is as follows: + +* The default license is shown first +* Followed by the remaining installed licenses in the order of installation +* The custom license is at the end + +Only the order of the installed licenses can be changed with the API calls. The default license always remains first and the custom license last. + +The order of licenses can be changed by setting the ``sortOrder`` property of a license. For the purpose of making sorting easier and to allow grouping of the licenses, ``sortOrder`` property does not have to be unique. Licenses with the same ``sortOrder`` are sorted by their name alfabetically. Nevertheless, you can set a unique ``sortOrder`` for every license in order to sort them fully manually. + +The ``sortOrder`` is an whole number and is used to sort licenses in ascending fashion. All licenses must have a sort order and initially it is set to installation order (``id`` property). + +Changing the sorting order of a license specified by the license ``$ID`` is done by superusers using the following API call: + +.. code-block:: bash + + export SORT_ORDER=100 + curl -X PUT -H 'Content-Type: application/json' -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/licenses/$ID/:sortOrder/$SORT_ORDER + .. _BagIt Export: BagIt Export From 7aeaa72b9583ddbc3e9585f28ef6d0572a81e0ee Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Tue, 10 May 2022 16:50:36 +0200 Subject: [PATCH 055/608] renamed flyway script to unique version --- ...-sorting_licenses.sql => V5.10.1.2__8671-sorting_licenses.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/main/resources/db/migration/{V5.10.1.1__8671-sorting_licenses.sql => V5.10.1.2__8671-sorting_licenses.sql} (100%) diff --git a/src/main/resources/db/migration/V5.10.1.1__8671-sorting_licenses.sql b/src/main/resources/db/migration/V5.10.1.2__8671-sorting_licenses.sql similarity index 100% rename from src/main/resources/db/migration/V5.10.1.1__8671-sorting_licenses.sql rename to src/main/resources/db/migration/V5.10.1.2__8671-sorting_licenses.sql From 98a013fea218bde39ad1e5f40b9f33ab35cba04e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 12 May 2022 13:43:27 -0400 Subject: [PATCH 056/608] add delete archival status method --- .../harvard/iq/dataverse/api/Datasets.java | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index baa9644700e..93884cf3f2b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3360,4 +3360,31 @@ public Response setDatasetVersionToArchiveStatus(@PathParam("id") String dsid, } return error(Status.BAD_REQUEST, "Unacceptable status format"); } + + @DELETE + @Produces(MediaType.APPLICATION_JSON) + @Path("/submitDatasetVersionToArchive/{id}/{version}/status") + public Response deleteDatasetVersionToArchiveStatus(@PathParam("id") String dsid, + @PathParam("version") String versionNumber) { + + try { + AuthenticatedUser au = findAuthenticatedUserOrDie(); + if (!au.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Superusers only."); + } + Dataset ds = findDatasetOrDie(dsid); + + DatasetVersion dv = datasetversionService.findByFriendlyVersionNumber(ds.getId(), versionNumber); + if (dv == null) { + return error(Status.NOT_FOUND, "Dataset version not found"); + } + dv.setArchivalCopyLocation(null); + dv = datasetversionService.merge(dv); + + return ok("Status deleted"); + + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } } From 6255418d9cd93f57c4510d5b6fbba83bdb5b57e2 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 12 May 2022 13:44:02 -0400 Subject: [PATCH 057/608] add isSingleVersion option false by default, can be true for DRS Archiver --- .../impl/AbstractSubmitToArchiveCommand.java | 8 +++++++- .../command/impl/DRSSubmitToArchiveCommand.java | 17 +++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index 99ef4b811cd..321d51a0595 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -98,5 +98,11 @@ String getDataCiteXml(DatasetVersion dv) { public static boolean isArchivable(Dataset dataset, SettingsWrapper settingsWrapper) { return true; - } + } + + public static boolean isSingleVersion(SettingsWrapper settingsWrapper) { + return false; + } + + } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 391ecb7b1d6..89b589f8a39 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -322,4 +322,21 @@ public static boolean isArchivable(Dataset d, SettingsWrapper sw) { } return false; } + + public static boolean isSingleVersion(SettingsWrapper sw) { + JsonObject drsConfigObject = null; + + try { + String config = sw.get(DRS_CONFIG, null); + if (config != null) { + drsConfigObject = JsonUtil.getJsonObject(config); + } + } catch (Exception e) { + logger.warning("Unable to parse " + DRS_CONFIG + " setting as a Json object"); + } + if (drsConfigObject != null) { + return drsConfigObject.getBoolean("single_version", false); + } + return false; + } } From 43382da166930af1a0b80f56d2d0216e6ecb7589 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 12 May 2022 15:06:36 -0400 Subject: [PATCH 058/608] adjust so can call from api as well --- .../impl/AbstractSubmitToArchiveCommand.java | 6 ++++++ .../command/impl/DRSSubmitToArchiveCommand.java | 13 +++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index 321d51a0595..9124cec751e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -100,9 +100,15 @@ public static boolean isArchivable(Dataset dataset, SettingsWrapper settingsWrap return true; } + //Check if the chosen archiver imposes single-version-only archiving - in a View context public static boolean isSingleVersion(SettingsWrapper settingsWrapper) { return false; } + + //Check if the chosen archiver imposes single-version-only archiving - in the API + public static boolean isSingleVersion(SettingsServiceBean settingsService) { + return false; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 89b589f8a39..8f3a179a2d7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -10,6 +10,7 @@ import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; @@ -324,10 +325,18 @@ public static boolean isArchivable(Dataset d, SettingsWrapper sw) { } public static boolean isSingleVersion(SettingsWrapper sw) { - JsonObject drsConfigObject = null; + String config = sw.get(DRS_CONFIG, null); + return isSingleVersion(config); + } + public static boolean isSingleVersion(SettingsServiceBean ss) { + String config = ss.get(DRS_CONFIG, null); + return isSingleVersion(config); + } + + private static boolean isSingleVersion(String config) { + JsonObject drsConfigObject = null; try { - String config = sw.get(DRS_CONFIG, null); if (config != null) { drsConfigObject = JsonUtil.getJsonObject(config); } From 0b83efe8f07245d86dbd41411be3cba026a63418 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 12 May 2022 15:21:05 -0400 Subject: [PATCH 059/608] API changes for single version semantics --- .../edu/harvard/iq/dataverse/api/Admin.java | 7 +++++ .../harvard/iq/dataverse/api/Datasets.java | 28 ++++++++++++++++++- .../iq/dataverse/util/ArchiverUtil.java | 24 ++++++++++++++++ 3 files changed, 58 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 678d563d7bb..b962136c557 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -1752,6 +1752,13 @@ public Response submitDatasetVersionToArchive(@PathParam("id") String dsid, AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dv); if (cmd != null) { + if(ArchiverUtil.onlySingleVersionArchiving(cmd.getClass(), settingsService)) { + for (DatasetVersion version : ds.getVersions()) { + if ((dv != version) && version.getArchivalCopyLocation() != null) { + return error(Status.CONFLICT, "Dataset already archived."); + } + } + } new Thread(new Runnable() { public void run() { try { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 93884cf3f2b..7a69a720971 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -100,6 +100,8 @@ import java.io.IOException; import java.io.InputStream; import java.io.StringReader; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; import java.net.URI; import java.sql.Timestamp; import java.text.MessageFormat; @@ -3300,7 +3302,7 @@ public Response getDatasetVersionToArchiveStatus(@PathParam("id") String dsid, return error(Response.Status.FORBIDDEN, "Superusers only."); } Dataset ds = findDatasetOrDie(dsid); - + DatasetVersion dv = datasetversionService.findByFriendlyVersionNumber(ds.getId(), versionNumber); if (dv.getArchivalCopyLocation() == null) { return error(Status.NO_CONTENT, "This dataset version has not been archived"); @@ -3345,6 +3347,14 @@ public Response setDatasetVersionToArchiveStatus(@PathParam("id") String dsid, if(dv==null) { return error(Status.NOT_FOUND, "Dataset version not found"); } + if (isSingleVersionArchiving()) { + for (DatasetVersion version : ds.getVersions()) { + if ((dv != version) && version.getArchivalCopyLocation() != null) { + return error(Status.CONFLICT, "Dataset already archived."); + } + } + } + dv.setArchivalCopyLocation(JsonUtil.prettyPrint(update)); dv = datasetversionService.merge(dv); logger.info("location now: " + dv.getArchivalCopyLocation()); @@ -3387,4 +3397,20 @@ public Response deleteDatasetVersionToArchiveStatus(@PathParam("id") String dsid return wr.getResponse(); } } + + private boolean isSingleVersionArchiving() { + String className = settingsService.getValueForKey(SettingsServiceBean.Key.ArchiverClassName, null); + if (className != null) { + Class clazz; + try { + clazz = Class.forName(className).asSubclass(AbstractSubmitToArchiveCommand.class); + return ArchiverUtil.onlySingleVersionArchiving(clazz, settingsService); + } catch (ClassNotFoundException e) { + logger.warning(":ArchiverClassName does not refer to a known Archiver"); + } catch (ClassCastException cce) { + logger.warning(":ArchiverClassName does not refer to an Archiver class"); + } + } + return false; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/ArchiverUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/ArchiverUtil.java index fc97f972f5c..31466470674 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/ArchiverUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/ArchiverUtil.java @@ -1,11 +1,15 @@ package edu.harvard.iq.dataverse.util; import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; import java.util.logging.Logger; import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.SettingsWrapper; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.impl.AbstractSubmitToArchiveCommand; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; /** * Simple class to reflectively get an instance of the desired class for @@ -35,4 +39,24 @@ public static AbstractSubmitToArchiveCommand createSubmitToArchiveCommand(String } return null; } + + public static boolean onlySingleVersionArchiving(Class clazz, SettingsServiceBean settingsService) { + Method m; + try { + m = clazz.getMethod("isSingleVersion", SettingsServiceBean.class); + Object[] params = { settingsService }; + return (Boolean) m.invoke(null, params); + } catch (NoSuchMethodException e) { + e.printStackTrace(); + } catch (SecurityException e) { + e.printStackTrace(); + } catch (IllegalAccessException e) { + e.printStackTrace(); + } catch (IllegalArgumentException e) { + e.printStackTrace(); + } catch (InvocationTargetException e) { + e.printStackTrace(); + } + return (AbstractSubmitToArchiveCommand.isSingleVersion(settingsService)); + } } From 3f4043ebdefae5a7f2df14727153dd8dba65e94e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 12 May 2022 15:21:26 -0400 Subject: [PATCH 060/608] UI changes for single version and new bundle strings --- .../edu/harvard/iq/dataverse/DatasetPage.java | 37 +++++++++++++++++++ src/main/java/propertyFiles/Bundle.properties | 6 +++ src/main/webapp/dataset-versions.xhtml | 16 ++++---- 3 files changed, 52 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index cff0f34b816..5caf0427f38 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -5567,6 +5567,43 @@ public boolean isArchivable() { } return false; } + + public boolean isVersionArchivable() { + // If this dataset isn't in an archivable collection retuyrn false + if (isArchivable()) { + boolean checkForArchivalCopy = false; + // Otherwise, we need to know if the archiver is single-version-only + // If it is, we have to check for an existing archived version to answer the + // question + String className = settingsWrapper.getValueForKey(SettingsServiceBean.Key.ArchiverClassName, null); + if (className != null) { + try { + Class clazz = Class.forName(className); + Method m = clazz.getMethod("isSingleVersion", SettingsWrapper.class); + Object[] params = { settingsWrapper }; + checkForArchivalCopy = (Boolean) m.invoke(null, params); + } catch (ClassNotFoundException | IllegalAccessException | IllegalArgumentException + | InvocationTargetException | NoSuchMethodException | SecurityException e) { + logger.warning("Failed to call is Archivable on configured archiver class: " + className); + e.printStackTrace(); + } + if (checkForArchivalCopy) { + // If we have to check (single version archiving), we can't allow archiving if + // one version is already archived (or attempted - any non-null status) + for (DatasetVersion dv : dataset.getVersions()) { + if (dv.getArchivalCopyLocation() != null) { + return false; + } + } + } + // If we allow multiple versions or didn't find one that has had archiving run + // on it, we can archive, so return true + return true; + } + } + //not in an archivable collection + return false; + } private static Date getFileDateToCompare(FileMetadata fileMetadata) { DataFile datafile = fileMetadata.getDataFile(); diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 9fa0fc71f3f..7bfcfbcbfa6 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -1835,6 +1835,12 @@ file.dataFilesTab.versions.headers.summary=Summary file.dataFilesTab.versions.headers.contributors=Contributors file.dataFilesTab.versions.headers.contributors.withheld=Contributor name(s) withheld file.dataFilesTab.versions.headers.published=Published on +file.dataFilesTab.versions.headers.archived=Archival Status +file.dataFilesTab.versions.headers.archived.success=Archived +file.dataFilesTab.versions.headers.archived.pending=Pending +file.dataFilesTab.versions.headers.archived.failure=Failed +file.dataFilesTab.versions.headers.archived.notarchived=Not Archived +file.dataFilesTab.versions.headers.archived.submit=Submit file.dataFilesTab.versions.viewDiffBtn=View Differences file.dataFilesTab.versions.citationMetadata=Citation Metadata: file.dataFilesTab.versions.added=Added diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index 37f7906c640..f4c80b43efe 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -148,20 +148,22 @@ - + - + - + - - - + + - + + + From 2f087c4287ec6e8a3b2b83336ec69f6a89eff9ef Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 13 May 2022 11:38:51 -0400 Subject: [PATCH 061/608] update display in unarchivable collections --- .../edu/harvard/iq/dataverse/DatasetPage.java | 17 +++++++++++------ src/main/webapp/dataset-versions.xhtml | 4 ++-- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 5caf0427f38..2f98e43dd93 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -5569,7 +5569,7 @@ public boolean isArchivable() { } public boolean isVersionArchivable() { - // If this dataset isn't in an archivable collection retuyrn false + // If this dataset isn't in an archivable collection return false if (isArchivable()) { boolean checkForArchivalCopy = false; // Otherwise, we need to know if the archiver is single-version-only @@ -5590,11 +5590,7 @@ public boolean isVersionArchivable() { if (checkForArchivalCopy) { // If we have to check (single version archiving), we can't allow archiving if // one version is already archived (or attempted - any non-null status) - for (DatasetVersion dv : dataset.getVersions()) { - if (dv.getArchivalCopyLocation() != null) { - return false; - } - } + return !isSomeVersionArchived(); } // If we allow multiple versions or didn't find one that has had archiving run // on it, we can archive, so return true @@ -5604,6 +5600,15 @@ public boolean isVersionArchivable() { //not in an archivable collection return false; } + + public boolean isSomeVersionArchived() { + for (DatasetVersion dv : dataset.getVersions()) { + if (dv.getArchivalCopyLocation() != null) { + return true; + } + } + return false; + } private static Date getFileDateToCompare(FileMetadata fileMetadata) { DataFile datafile = fileMetadata.getDataFile(); diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index f4c80b43efe..6f144ec46d9 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -148,7 +148,7 @@ - + @@ -162,7 +162,7 @@ action="#{DatasetPage.archiveVersion(versionTab.id)}"> - + From baec83e6b8eb2bbfdce64e384704f2a89044217b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 13 May 2022 11:46:34 -0400 Subject: [PATCH 062/608] single version for command/workflow --- .../engine/command/impl/DRSSubmitToArchiveCommand.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 8f3a179a2d7..b2a99ce6d44 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -99,6 +99,15 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t String packageId = spaceName + ".v" + dv.getFriendlyVersionNumber(); if (alias != null) { + if (drsConfigObject.getBoolean("single_version", false)) { + for (DatasetVersion version : dataset.getVersions()) { + if (version.getArchivalCopyLocation() != null) { + return new Failure("DRS Archiver fail: version " + version.getFriendlyVersionNumber() + + " already archived."); + } + } + } + JsonObject collectionConfig = adminMetadata.getJsonObject(COLLECTIONS).getJsonObject(alias); WorkflowStepResult s3Result = super.performArchiveSubmission(dv, token, requestedSettings); From 4212a2119ae0d1e5e9084ca9e0803bc039be2a97 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 13 May 2022 11:53:54 -0400 Subject: [PATCH 063/608] typo --- src/main/webapp/dataset-versions.xhtml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index 6f144ec46d9..4d04546133e 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -162,7 +162,7 @@ action="#{DatasetPage.archiveVersion(versionTab.id)}"> - + From 7f1561d239031beba167c024d432a88ce7813e33 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Mon, 16 May 2022 12:52:01 +0200 Subject: [PATCH 064/608] licenses are now sorted first by sortOrder then by ID --- doc/sphinx-guides/source/installation/config.rst | 4 ++-- src/main/java/edu/harvard/iq/dataverse/license/License.java | 6 +++--- .../db/migration/V5.10.1.2__8671-sorting_licenses.sql | 4 ---- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index b99ee2bca83..8bc1e063075 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1051,9 +1051,9 @@ The default order of licenses in the dropdown in the user interface is as follow Only the order of the installed licenses can be changed with the API calls. The default license always remains first and the custom license last. -The order of licenses can be changed by setting the ``sortOrder`` property of a license. For the purpose of making sorting easier and to allow grouping of the licenses, ``sortOrder`` property does not have to be unique. Licenses with the same ``sortOrder`` are sorted by their name alfabetically. Nevertheless, you can set a unique ``sortOrder`` for every license in order to sort them fully manually. +The order of licenses can be changed by setting the ``sortOrder`` property of a license. For the purpose of making sorting easier and to allow grouping of the licenses, ``sortOrder`` property does not have to be unique. Licenses with the same ``sortOrder`` are sorted by their ID, i.e., first by the sortOrder, then by the ID. Nevertheless, you can set a unique ``sortOrder`` for every license in order to sort them fully manually. -The ``sortOrder`` is an whole number and is used to sort licenses in ascending fashion. All licenses must have a sort order and initially it is set to installation order (``id`` property). +The ``sortOrder`` is an whole number and is used to sort licenses in ascending fashion. Changing the sorting order of a license specified by the license ``$ID`` is done by superusers using the following API call: diff --git a/src/main/java/edu/harvard/iq/dataverse/license/License.java b/src/main/java/edu/harvard/iq/dataverse/license/License.java index 4f99470d7b4..0c8465e88e4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/license/License.java +++ b/src/main/java/edu/harvard/iq/dataverse/license/License.java @@ -23,9 +23,9 @@ */ @NamedQueries({ @NamedQuery( name="License.findAll", - query="SELECT l FROM License l ORDER BY (case when l.isDefault then 0 else 1 end), l.sortOrder, l.name asc"), + query="SELECT l FROM License l ORDER BY (case when l.isDefault then 0 else 1 end), l.sortOrder, l.id asc"), @NamedQuery( name="License.findAllActive", - query="SELECT l FROM License l WHERE l.active='true' ORDER BY (case when l.isDefault then 0 else 1 end), l.sortOrder, l.name asc"), + query="SELECT l FROM License l WHERE l.active='true' ORDER BY (case when l.isDefault then 0 else 1 end), l.sortOrder, l.id asc"), @NamedQuery( name="License.findById", query = "SELECT l FROM License l WHERE l.id=:id"), @NamedQuery( name="License.findDefault", @@ -76,7 +76,7 @@ public class License { @Column(nullable = false) private boolean isDefault; - @Column(nullable = false) + @Column(nullable = true) private Long sortOrder; @OneToMany(mappedBy="license") diff --git a/src/main/resources/db/migration/V5.10.1.2__8671-sorting_licenses.sql b/src/main/resources/db/migration/V5.10.1.2__8671-sorting_licenses.sql index 5bc18e69df0..43631ebd165 100644 --- a/src/main/resources/db/migration/V5.10.1.2__8671-sorting_licenses.sql +++ b/src/main/resources/db/migration/V5.10.1.2__8671-sorting_licenses.sql @@ -1,9 +1,5 @@ ALTER TABLE license ADD COLUMN IF NOT EXISTS sortorder BIGINT; -UPDATE license -SET sortorder = id -WHERE sortorder IS NULL; - CREATE INDEX IF NOT EXISTS license_sortorder_id ON license (sortorder, id); \ No newline at end of file From c3b3cb219f7aac841b1dae1e79059b0b3a477ff4 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 16 May 2022 10:47:26 -0400 Subject: [PATCH 065/608] update for #8592 semantic mapping update --- .../internalspi/LDNAnnounceDatasetVersionStep.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java index 7ce65359968..3388e54e5bf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java +++ b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java @@ -155,13 +155,13 @@ HttpPost buildAnnouncement(boolean b, WorkflowContext ctxt, JsonObject target) t for (DatasetFieldType cdft : childTypes) { switch (cdft.getName()) { case "publicationURL": - publicationURL = OREMap.getTermFor(dft, cdft); + publicationURL = cdft.getJsonLDTerm(); break; case "publicationIDType": - publicationIDType = OREMap.getTermFor(dft, cdft); + publicationIDType = cdft.getJsonLDTerm(); break; case "publicationIDNumber": - publicationIDNumber = OREMap.getTermFor(dft, cdft); + publicationIDNumber = cdft.getJsonLDTerm(); break; } @@ -188,7 +188,7 @@ HttpPost buildAnnouncement(boolean b, WorkflowContext ctxt, JsonObject target) t default: if (jv != null) { includeLocalContext = true; - coarContext.add(OREMap.getTermFor(dft).getLabel(), jv); + coarContext.add(dft.getJsonLDTerm().getLabel(), jv); } } From 7dd6f2e71fb546e105e7492c35de1737721b3409 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 16 May 2022 14:56:19 -0400 Subject: [PATCH 066/608] bug - related to sem api change --- src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java index 84423f60eca..4b31e5cf0a4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java @@ -324,7 +324,7 @@ public JsonLDTerm getDescriptionTextTerm() { private JsonLDTerm getTermFor(String fieldTypeName) { //Could call datasetFieldService.findByName(fieldTypeName) - is that faster/prefereable? - for (DatasetField dsf : version.getDatasetFields()) { + for (DatasetField dsf : version.getFlatDatasetFields()) { DatasetFieldType dsft = dsf.getDatasetFieldType(); if (dsft.getName().equals(fieldTypeName)) { return dsft.getJsonLDTerm(); From 2104ec63ddd00ea4110de7e3fe568d2492d4b8f4 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 16 May 2022 14:57:21 -0400 Subject: [PATCH 067/608] bug - superadmin no version w/status display of archival status --- src/main/webapp/dataset-versions.xhtml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index 4d04546133e..ddd305c50f7 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -162,7 +162,7 @@ action="#{DatasetPage.archiveVersion(versionTab.id)}"> - + From dfe70fe23ff8e59dba332692d20a02d6add77205 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 16 May 2022 14:56:19 -0400 Subject: [PATCH 068/608] bug - related to sem api change --- src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java index 1bc43dae466..a295f264d66 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java @@ -395,7 +395,7 @@ public JsonLDTerm getDescriptionTextTerm() { private JsonLDTerm getTermFor(String fieldTypeName) { //Could call datasetFieldService.findByName(fieldTypeName) - is that faster/prefereable? - for (DatasetField dsf : version.getDatasetFields()) { + for (DatasetField dsf : version.getFlatDatasetFields()) { DatasetFieldType dsft = dsf.getDatasetFieldType(); if (dsft.getName().equals(fieldTypeName)) { return dsft.getJsonLDTerm(); From 96bab6a2f6ddf15aedb922c05b88a1e3eb4a66eb Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 16 May 2022 14:57:21 -0400 Subject: [PATCH 069/608] bug - superadmin no version w/status display of archival status --- src/main/webapp/dataset-versions.xhtml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index 4d04546133e..ddd305c50f7 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -162,7 +162,7 @@ action="#{DatasetPage.archiveVersion(versionTab.id)}"> - + From f4ba94b8515597f1f785735b0e02bb00771a8ea6 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 17 May 2022 09:09:31 -0400 Subject: [PATCH 070/608] fix status single version logic --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 7a69a720971..8b6d75d3629 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3349,7 +3349,7 @@ public Response setDatasetVersionToArchiveStatus(@PathParam("id") String dsid, } if (isSingleVersionArchiving()) { for (DatasetVersion version : ds.getVersions()) { - if ((dv != version) && version.getArchivalCopyLocation() != null) { + if ((!dv.equals(version)) && (version.getArchivalCopyLocation() != null)) { return error(Status.CONFLICT, "Dataset already archived."); } } From 631091debff4e2ccc2d17870eef8e48fd08c7145 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 26 May 2022 14:46:43 -0400 Subject: [PATCH 071/608] update test --- .../impl/DRSSubmitToArchiveCommandTest.java | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java index 0bfd8ac18f2..64c5956f28f 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java @@ -1,11 +1,13 @@ package edu.harvard.iq.dataverse.engine.command.impl; import edu.harvard.iq.dataverse.branding.BrandingUtil; +import edu.harvard.iq.dataverse.branding.BrandingUtilTest; -import org.apache.commons.codec.digest.DigestUtils; import org.erdtman.jcs.JsonCanonicalizer; import org.junit.Assert; import org.junit.Test; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; import com.auth0.jwt.JWT; import com.auth0.jwt.algorithms.Algorithm; @@ -16,9 +18,7 @@ import java.security.interfaces.RSAPrivateKey; import java.security.interfaces.RSAPublicKey; import java.security.spec.PKCS8EncodedKeySpec; -import java.time.Instant; import java.util.Base64; -import java.util.Date; /** * @@ -26,6 +26,16 @@ */ public class DRSSubmitToArchiveCommandTest { + + @BeforeAll + private static void setUpAll() { + BrandingUtilTest.setupMocks(); + } + @AfterAll + private static void tearDownAll() { + BrandingUtilTest.tearDownMocks(); + } + @Test public void createJWT() throws CommandException { From 8606475c3696fafedb022260618bd0dddff164ea Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 26 May 2022 14:57:20 -0400 Subject: [PATCH 072/608] hardcode brandname in test --- .../command/impl/DRSSubmitToArchiveCommandTest.java | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java index 64c5956f28f..62135287d18 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java @@ -26,16 +26,6 @@ */ public class DRSSubmitToArchiveCommandTest { - - @BeforeAll - private static void setUpAll() { - BrandingUtilTest.setupMocks(); - } - @AfterAll - private static void tearDownAll() { - BrandingUtilTest.tearDownMocks(); - } - @Test public void createJWT() throws CommandException { @@ -121,7 +111,7 @@ public void createJWT() throws CommandException { System.out.println("Canonical form:"+ canonicalBody); Algorithm algorithmRSA = Algorithm.RSA256(null, privKey); - String token1 = DRSSubmitToArchiveCommand.createJWTString(algorithmRSA, BrandingUtil.getInstallationBrandName(), fakeBody, 5); + String token1 = DRSSubmitToArchiveCommand.createJWTString(algorithmRSA, "InstallationBrandName", fakeBody, 5); System.out.println("JWT: " + token1); DecodedJWT jwt = JWT.decode(token1); From 46212be671fbc42a6d56f4069baafac9a29521ee Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Tue, 31 May 2022 12:51:53 +0200 Subject: [PATCH 073/608] updated documentation and example with mandatory sort order in licenses --- doc/release-notes/5.10-release-notes.md | 2 +- doc/sphinx-guides/source/_static/api/add-license.json | 3 ++- doc/sphinx-guides/source/api/native-api.rst | 2 +- doc/sphinx-guides/source/api/sword.rst | 2 +- doc/sphinx-guides/source/installation/config.rst | 3 +-- scripts/api/data/licenses/licenseCC-BY-4.0.json | 3 ++- scripts/api/data/licenses/licenseCC-BY-NC-4.0.json | 3 ++- scripts/api/data/licenses/licenseCC-BY-NC-ND-4.0.json | 3 ++- scripts/api/data/licenses/licenseCC-BY-NC-SA-4.0.json | 3 ++- scripts/api/data/licenses/licenseCC-BY-ND-4.0.json | 3 ++- scripts/api/data/licenses/licenseCC-BY-SA-4.0.json | 3 ++- scripts/api/data/licenses/licenseCC0-1.0.json | 3 ++- ...rting_licenses.sql => V5.10.1.3__8671-sorting_licenses.sql} | 0 13 files changed, 20 insertions(+), 13 deletions(-) rename src/main/resources/db/migration/{V5.10.1.2__8671-sorting_licenses.sql => V5.10.1.3__8671-sorting_licenses.sql} (100%) diff --git a/doc/release-notes/5.10-release-notes.md b/doc/release-notes/5.10-release-notes.md index 0da42a7b527..4e9e5e0ef94 100644 --- a/doc/release-notes/5.10-release-notes.md +++ b/doc/release-notes/5.10-release-notes.md @@ -6,7 +6,7 @@ This release brings new features, enhancements, and bug fixes to the Dataverse S ### Multiple License Support -Users can now select from a set of configured licenses in addition to or instead of the previous Creative Commons CC0 choice or provide custom terms of use (if configured) for their datasets. Administrators can configure their Dataverse instance via API to allow any desired license as a choice and can enable or disable the option to allow custom terms. Administrators can also mark licenses as "inactive" to disallow future use while keeping that license for existing datasets. For upgrades, only the CC0 license will be preinstalled. New installations will have both CC0 and CC BY preinstalled. The [Configuring Licenses](https://guides.dataverse.org/en/5.10/installation/config.html#configuring-licenses) section of the Installation Guide shows how to add or remove licenses. +Users can now select from a set of configured licenses in addition to or instead of the previous Creative Commons CC0 choice or provide custom terms of use (if configured) for their datasets. Administrators can configure their Dataverse instance via API to allow any desired license as a choice and can enable or disable the option to allow custom terms. Administrators can also mark licenses as "inactive" to disallow future use while keeping that license for existing datasets. For upgrades, only the CC0 license will be preinstalled. New installations will have both CC0 1.0 preinstalled. The [Configuring Licenses](https://guides.dataverse.org/en/5.10/installation/config.html#configuring-licenses) section of the Installation Guide shows how to add or remove licenses. **Note: Datasets in existing installations will automatically be updated to conform to new requirements that custom terms cannot be used with a standard license and that custom terms cannot be empty. Administrators may wish to manually update datasets with these conditions if they do not like the automated migration choices. See the "Notes for Dataverse Installation Administrators" section below for details.** diff --git a/doc/sphinx-guides/source/_static/api/add-license.json b/doc/sphinx-guides/source/_static/api/add-license.json index 969d6d58dab..a9d5dd34093 100644 --- a/doc/sphinx-guides/source/_static/api/add-license.json +++ b/doc/sphinx-guides/source/_static/api/add-license.json @@ -3,5 +3,6 @@ "uri": "http://creativecommons.org/licenses/by/4.0", "shortDescription": "Creative Commons Attribution 4.0 International License.", "iconUrl": "https://i.creativecommons.org/l/by/4.0/88x31.png", - "active": true + "active": true, + "sortOrder": 2 } diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 249d1812507..da82be9ad7b 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -3858,7 +3858,7 @@ View the details of the standard license with the database ID specified in ``$ID curl $SERVER_URL/api/licenses/$ID -Superusers can add a new license by posting a JSON file adapted from this example :download:`add-license.json <../_static/api/add-license.json>`. The ``name`` and ``uri`` of the new license must be unique. If you are interested in adding a Creative Commons license, you are encouarged to use the JSON files under :ref:`adding-creative-commons-licenses`: +Superusers can add a new license by posting a JSON file adapted from this example :download:`add-license.json <../_static/api/add-license.json>`. The ``name`` and ``uri`` of the new license must be unique. Sort order field is mandatory. If you are interested in adding a Creative Commons license, you are encouarged to use the JSON files under :ref:`adding-creative-commons-licenses`: .. code-block:: bash diff --git a/doc/sphinx-guides/source/api/sword.rst b/doc/sphinx-guides/source/api/sword.rst index 11b43e98774..8041dff4891 100755 --- a/doc/sphinx-guides/source/api/sword.rst +++ b/doc/sphinx-guides/source/api/sword.rst @@ -82,7 +82,7 @@ New features as of v1.1 - "Contributor" can now be populated and the "Type" (Editor, Funder, Researcher, etc.) can be specified with an XML attribute. For example: ``CaffeineForAll`` -- "License" can now be set with ``dcterms:license`` and the possible values determined by the installation ("CC0 1.0" and "CC BY 4.0" by default). "License" interacts with "Terms of Use" (``dcterms:rights``) in that if you include ``dcterms:rights`` in the XML and don't include ``dcterms:license``, the license will be "Custom Dataset Terms" and "Terms of Use" will be populated. If you don't include ``dcterms:rights``, the default license will be used. It is invalid to specify a license and also include ``dcterms:rights``; an error will be returned. For backwards compatibility, ``dcterms:rights`` is allowed to be blank (i.e. ````) but blank values will not be persisted to the database and the license will be set to "Custom Dataset Terms". Note that if admins of an installation have disabled "Custom Dataset Terms" you will get an error if you try to pass ``dcterms:rights``. +- "License" can now be set with ``dcterms:license`` and the possible values determined by the installation ("CC0 1.0" by default). "License" interacts with "Terms of Use" (``dcterms:rights``) in that if you include ``dcterms:rights`` in the XML and don't include ``dcterms:license``, the license will be "Custom Dataset Terms" and "Terms of Use" will be populated. If you don't include ``dcterms:rights``, the default license will be used. It is invalid to specify a license and also include ``dcterms:rights``; an error will be returned. For backwards compatibility, ``dcterms:rights`` is allowed to be blank (i.e. ````) but blank values will not be persisted to the database and the license will be set to "Custom Dataset Terms". Note that if admins of an installation have disabled "Custom Dataset Terms" you will get an error if you try to pass ``dcterms:rights``. - "Contact E-mail" is automatically populated from dataset owner's email. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 99ed622c911..61e13ad10c8 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -991,7 +991,6 @@ Configuring Licenses Out of the box, users select from the following licenses or terms: - CC0 1.0 (default) -- CC BY 4.0 - Custom Dataset Terms You have a lot of control over which licenses and terms are available. You can remove licenses and add new ones. You can decide which license is the default. You can remove "Custom Dataset Terms" as a option. You can remove all licenses and make "Custom Dataset Terms" the only option. @@ -1015,7 +1014,7 @@ Licenses are added with curl using JSON file as explained in the API Guide under Adding Creative Common Licenses ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -JSON files for `Creative Commons licenses `_ are provided below. Note that a new installation of Dataverse already includes CC0 and CC BY. +JSON files for `Creative Commons licenses `_ are provided below. Note that a new installation of Dataverse already includes CC0. - :download:`licenseCC0-1.0.json <../../../../scripts/api/data/licenses/licenseCC0-1.0.json>` - :download:`licenseCC-BY-4.0.json <../../../../scripts/api/data/licenses/licenseCC-BY-4.0.json>` diff --git a/scripts/api/data/licenses/licenseCC-BY-4.0.json b/scripts/api/data/licenses/licenseCC-BY-4.0.json index 5596e65e947..59201b8d08e 100644 --- a/scripts/api/data/licenses/licenseCC-BY-4.0.json +++ b/scripts/api/data/licenses/licenseCC-BY-4.0.json @@ -3,5 +3,6 @@ "uri": "http://creativecommons.org/licenses/by/4.0", "shortDescription": "Creative Commons Attribution 4.0 International License.", "iconUrl": "https://licensebuttons.net/l/by/4.0/88x31.png", - "active": true + "active": true, + "sortOrder": 2 } diff --git a/scripts/api/data/licenses/licenseCC-BY-NC-4.0.json b/scripts/api/data/licenses/licenseCC-BY-NC-4.0.json index 8154c9ec5df..c19087664db 100644 --- a/scripts/api/data/licenses/licenseCC-BY-NC-4.0.json +++ b/scripts/api/data/licenses/licenseCC-BY-NC-4.0.json @@ -3,5 +3,6 @@ "uri": "http://creativecommons.org/licenses/by-nc/4.0", "shortDescription": "Creative Commons Attribution-NonCommercial 4.0 International License.", "iconUrl": "https://licensebuttons.net/l/by-nc/4.0/88x31.png", - "active": true + "active": true, + "sortOrder": 4 } diff --git a/scripts/api/data/licenses/licenseCC-BY-NC-ND-4.0.json b/scripts/api/data/licenses/licenseCC-BY-NC-ND-4.0.json index 247ce52f6ea..2e374917d28 100644 --- a/scripts/api/data/licenses/licenseCC-BY-NC-ND-4.0.json +++ b/scripts/api/data/licenses/licenseCC-BY-NC-ND-4.0.json @@ -3,5 +3,6 @@ "uri": "http://creativecommons.org/licenses/by-nc-nd/4.0", "shortDescription": "Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International License.", "iconUrl": "https://licensebuttons.net/l/by-nc-nd/4.0/88x31.png", - "active": true + "active": true, + "sortOrder": 7 } diff --git a/scripts/api/data/licenses/licenseCC-BY-NC-SA-4.0.json b/scripts/api/data/licenses/licenseCC-BY-NC-SA-4.0.json index e9726fb6374..5018884f65e 100644 --- a/scripts/api/data/licenses/licenseCC-BY-NC-SA-4.0.json +++ b/scripts/api/data/licenses/licenseCC-BY-NC-SA-4.0.json @@ -3,5 +3,6 @@ "uri": "http://creativecommons.org/licenses/by-nc-sa/4.0", "shortDescription": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.", "iconUrl": "https://licensebuttons.net/l/by-nc-sa/4.0/88x31.png", - "active": true + "active": true, + "sortOrder": 3 } diff --git a/scripts/api/data/licenses/licenseCC-BY-ND-4.0.json b/scripts/api/data/licenses/licenseCC-BY-ND-4.0.json index 7ae81bacc10..317d459a7ae 100644 --- a/scripts/api/data/licenses/licenseCC-BY-ND-4.0.json +++ b/scripts/api/data/licenses/licenseCC-BY-ND-4.0.json @@ -3,5 +3,6 @@ "uri": "http://creativecommons.org/licenses/by-nd/4.0", "shortDescription": "Creative Commons Attribution-NoDerivatives 4.0 International License.", "iconUrl": "https://licensebuttons.net/l/by-nd/4.0/88x31.png", - "active": true + "active": true, + "sortOrder": 6 } diff --git a/scripts/api/data/licenses/licenseCC-BY-SA-4.0.json b/scripts/api/data/licenses/licenseCC-BY-SA-4.0.json index e9a02880885..0d28c9423aa 100644 --- a/scripts/api/data/licenses/licenseCC-BY-SA-4.0.json +++ b/scripts/api/data/licenses/licenseCC-BY-SA-4.0.json @@ -3,5 +3,6 @@ "uri": "http://creativecommons.org/licenses/by-sa/4.0", "shortDescription": "Creative Commons Attribution-ShareAlike 4.0 International License.", "iconUrl": "https://licensebuttons.net/l/by-sa/4.0/88x31.png", - "active": true + "active": true, + "sortOrder": 5 } diff --git a/scripts/api/data/licenses/licenseCC0-1.0.json b/scripts/api/data/licenses/licenseCC0-1.0.json index 396ba133327..216260a5de8 100644 --- a/scripts/api/data/licenses/licenseCC0-1.0.json +++ b/scripts/api/data/licenses/licenseCC0-1.0.json @@ -3,5 +3,6 @@ "uri": "http://creativecommons.org/publicdomain/zero/1.0", "shortDescription": "Creative Commons CC0 1.0 Universal Public Domain Dedication.", "iconUrl": "https://licensebuttons.net/p/zero/1.0/88x31.png", - "active": true + "active": true, + "sortOrder": 1 } diff --git a/src/main/resources/db/migration/V5.10.1.2__8671-sorting_licenses.sql b/src/main/resources/db/migration/V5.10.1.3__8671-sorting_licenses.sql similarity index 100% rename from src/main/resources/db/migration/V5.10.1.2__8671-sorting_licenses.sql rename to src/main/resources/db/migration/V5.10.1.3__8671-sorting_licenses.sql From adc8c18e8481426a5614efbccdc94e3be5d9c051 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 2 Jun 2022 17:41:36 +0200 Subject: [PATCH 074/608] revert of removing cc by from documentation --- doc/release-notes/5.10-release-notes.md | 2 +- doc/sphinx-guides/source/api/sword.rst | 2 +- doc/sphinx-guides/source/installation/config.rst | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/release-notes/5.10-release-notes.md b/doc/release-notes/5.10-release-notes.md index 4e9e5e0ef94..0da42a7b527 100644 --- a/doc/release-notes/5.10-release-notes.md +++ b/doc/release-notes/5.10-release-notes.md @@ -6,7 +6,7 @@ This release brings new features, enhancements, and bug fixes to the Dataverse S ### Multiple License Support -Users can now select from a set of configured licenses in addition to or instead of the previous Creative Commons CC0 choice or provide custom terms of use (if configured) for their datasets. Administrators can configure their Dataverse instance via API to allow any desired license as a choice and can enable or disable the option to allow custom terms. Administrators can also mark licenses as "inactive" to disallow future use while keeping that license for existing datasets. For upgrades, only the CC0 license will be preinstalled. New installations will have both CC0 1.0 preinstalled. The [Configuring Licenses](https://guides.dataverse.org/en/5.10/installation/config.html#configuring-licenses) section of the Installation Guide shows how to add or remove licenses. +Users can now select from a set of configured licenses in addition to or instead of the previous Creative Commons CC0 choice or provide custom terms of use (if configured) for their datasets. Administrators can configure their Dataverse instance via API to allow any desired license as a choice and can enable or disable the option to allow custom terms. Administrators can also mark licenses as "inactive" to disallow future use while keeping that license for existing datasets. For upgrades, only the CC0 license will be preinstalled. New installations will have both CC0 and CC BY preinstalled. The [Configuring Licenses](https://guides.dataverse.org/en/5.10/installation/config.html#configuring-licenses) section of the Installation Guide shows how to add or remove licenses. **Note: Datasets in existing installations will automatically be updated to conform to new requirements that custom terms cannot be used with a standard license and that custom terms cannot be empty. Administrators may wish to manually update datasets with these conditions if they do not like the automated migration choices. See the "Notes for Dataverse Installation Administrators" section below for details.** diff --git a/doc/sphinx-guides/source/api/sword.rst b/doc/sphinx-guides/source/api/sword.rst index 8041dff4891..11b43e98774 100755 --- a/doc/sphinx-guides/source/api/sword.rst +++ b/doc/sphinx-guides/source/api/sword.rst @@ -82,7 +82,7 @@ New features as of v1.1 - "Contributor" can now be populated and the "Type" (Editor, Funder, Researcher, etc.) can be specified with an XML attribute. For example: ``CaffeineForAll`` -- "License" can now be set with ``dcterms:license`` and the possible values determined by the installation ("CC0 1.0" by default). "License" interacts with "Terms of Use" (``dcterms:rights``) in that if you include ``dcterms:rights`` in the XML and don't include ``dcterms:license``, the license will be "Custom Dataset Terms" and "Terms of Use" will be populated. If you don't include ``dcterms:rights``, the default license will be used. It is invalid to specify a license and also include ``dcterms:rights``; an error will be returned. For backwards compatibility, ``dcterms:rights`` is allowed to be blank (i.e. ````) but blank values will not be persisted to the database and the license will be set to "Custom Dataset Terms". Note that if admins of an installation have disabled "Custom Dataset Terms" you will get an error if you try to pass ``dcterms:rights``. +- "License" can now be set with ``dcterms:license`` and the possible values determined by the installation ("CC0 1.0" and "CC BY 4.0" by default). "License" interacts with "Terms of Use" (``dcterms:rights``) in that if you include ``dcterms:rights`` in the XML and don't include ``dcterms:license``, the license will be "Custom Dataset Terms" and "Terms of Use" will be populated. If you don't include ``dcterms:rights``, the default license will be used. It is invalid to specify a license and also include ``dcterms:rights``; an error will be returned. For backwards compatibility, ``dcterms:rights`` is allowed to be blank (i.e. ````) but blank values will not be persisted to the database and the license will be set to "Custom Dataset Terms". Note that if admins of an installation have disabled "Custom Dataset Terms" you will get an error if you try to pass ``dcterms:rights``. - "Contact E-mail" is automatically populated from dataset owner's email. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 61e13ad10c8..99ed622c911 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -991,6 +991,7 @@ Configuring Licenses Out of the box, users select from the following licenses or terms: - CC0 1.0 (default) +- CC BY 4.0 - Custom Dataset Terms You have a lot of control over which licenses and terms are available. You can remove licenses and add new ones. You can decide which license is the default. You can remove "Custom Dataset Terms" as a option. You can remove all licenses and make "Custom Dataset Terms" the only option. @@ -1014,7 +1015,7 @@ Licenses are added with curl using JSON file as explained in the API Guide under Adding Creative Common Licenses ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -JSON files for `Creative Commons licenses `_ are provided below. Note that a new installation of Dataverse already includes CC0. +JSON files for `Creative Commons licenses `_ are provided below. Note that a new installation of Dataverse already includes CC0 and CC BY. - :download:`licenseCC0-1.0.json <../../../../scripts/api/data/licenses/licenseCC0-1.0.json>` - :download:`licenseCC-BY-4.0.json <../../../../scripts/api/data/licenses/licenseCC-BY-4.0.json>` From 0c515209572567a12ec3bc49fa5c389366bf14fe Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 3 Jun 2022 11:48:27 -0400 Subject: [PATCH 075/608] restore bagger improvements --- .../iq/dataverse/util/bagit/BagGenerator.java | 102 +++++++++++------- 1 file changed, 64 insertions(+), 38 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index a6ee60198c3..27bf96c3e71 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -47,6 +47,7 @@ import org.apache.commons.compress.utils.IOUtils; import org.apache.commons.text.WordUtils; import org.apache.http.client.ClientProtocolException; +import org.apache.http.client.config.CookieSpecs; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; @@ -58,7 +59,7 @@ import org.apache.http.conn.ssl.SSLConnectionSocketFactory; import org.apache.http.conn.ssl.TrustSelfSignedStrategy; import org.apache.http.ssl.SSLContextBuilder; - +import org.apache.http.util.EntityUtils; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; @@ -89,7 +90,8 @@ public class BagGenerator { private int timeout = 60; private RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000) - .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000).build(); + .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000) + .setCookieSpec(CookieSpecs.STANDARD).build(); protected CloseableHttpClient client; private PoolingHttpClientConnectionManager cm = null; @@ -278,7 +280,8 @@ public boolean generateBag(OutputStream outputStream) throws Exception { } createFileFromString(manifestName, sha1StringBuffer.toString()); } else { - logger.warning("No Hash values sent - Bag File does not meet BagIT specification requirement"); + logger.warning("No Hash values (no files?) sending empty manifest to nominally comply with BagIT specification requirement"); + createFileFromString("manifest-md5.txt", ""); } // bagit.txt - Required by spec createFileFromString("bagit.txt", "BagIt-Version: 1.0\r\nTag-File-Character-Encoding: UTF-8"); @@ -463,7 +466,6 @@ private void validateBagFile(File bagFile) throws IOException { logger.info("Data Count: " + dataCount); logger.info("Data Size: " + totalDataSize); - //zf.close(); } public static String getValidName(String bagName) { @@ -1003,46 +1005,70 @@ private HttpGet createNewGetRequest(URI url, String returnType) { return request; } - InputStreamSupplier getInputStreamSupplier(final String uri) { + InputStreamSupplier getInputStreamSupplier(final String uriString) { return new InputStreamSupplier() { public InputStream get() { - int tries = 0; - while (tries < 5) { - try { - logger.fine("Get # " + tries + " for " + uri); - HttpGet getMap = createNewGetRequest(new URI(uri), null); - logger.finest("Retrieving " + tries + ": " + uri); - CloseableHttpResponse response; - //Note - if we ever need to pass an HttpClientContext, we need a new one per thread. - response = client.execute(getMap); - if (response.getStatusLine().getStatusCode() == 200) { - logger.finest("Retrieved: " + uri); - return response.getEntity().getContent(); - } - logger.fine("Status: " + response.getStatusLine().getStatusCode()); - tries++; - - } catch (ClientProtocolException e) { - tries += 5; - // TODO Auto-generated catch block - e.printStackTrace(); - } catch (IOException e) { - // Retry if this is a potentially temporary error such - // as a timeout - tries++; - logger.log(Level.WARNING,"Attempt# " + tries + " : Unable to retrieve file: " + uri, e); - if (tries == 5) { - logger.severe("Final attempt failed for " + uri); + try { + URI uri = new URI(uriString); + + int tries = 0; + while (tries < 5) { + + logger.fine("Get # " + tries + " for " + uriString); + HttpGet getFile = createNewGetRequest(uri, null); + logger.finest("Retrieving " + tries + ": " + uriString); + CloseableHttpResponse response = null; + try { + response = client.execute(getFile); + // Note - if we ever need to pass an HttpClientContext, we need a new one per + // thread. + int statusCode = response.getStatusLine().getStatusCode(); + if (statusCode == 200) { + logger.finest("Retrieved: " + uri); + return response.getEntity().getContent(); + } + logger.warning("Attempt: " + tries + " - Unexpected Status when retrieving " + uriString + + " : " + statusCode); + if (statusCode < 500) { + logger.fine("Will not retry for 40x errors"); + tries += 5; + } else { + tries++; + } + // Error handling + if (response != null) { + try { + EntityUtils.consumeQuietly(response.getEntity()); + response.close(); + } catch (IOException io) { + logger.warning( + "Exception closing response after status: " + statusCode + " on " + uri); + } + } + } catch (ClientProtocolException e) { + tries += 5; + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (IOException e) { + // Retry if this is a potentially temporary error such + // as a timeout + tries++; + logger.log(Level.WARNING, "Attempt# " + tries + " : Unable to retrieve file: " + uriString, + e); + if (tries == 5) { + logger.severe("Final attempt failed for " + uriString); + } + e.printStackTrace(); } - e.printStackTrace(); - } catch (URISyntaxException e) { - tries += 5; - // TODO Auto-generated catch block - e.printStackTrace(); + } + + } catch (URISyntaxException e) { + // TODO Auto-generated catch block + e.printStackTrace(); } - logger.severe("Could not read: " + uri); + logger.severe("Could not read: " + uriString); return null; } }; From df40225a1e108b9922678e5be84681701439a31f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 3 Jun 2022 11:48:27 -0400 Subject: [PATCH 076/608] restore bagger improvements --- .../iq/dataverse/util/bagit/BagGenerator.java | 102 +++++++++++------- 1 file changed, 64 insertions(+), 38 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index a6ee60198c3..27bf96c3e71 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -47,6 +47,7 @@ import org.apache.commons.compress.utils.IOUtils; import org.apache.commons.text.WordUtils; import org.apache.http.client.ClientProtocolException; +import org.apache.http.client.config.CookieSpecs; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; @@ -58,7 +59,7 @@ import org.apache.http.conn.ssl.SSLConnectionSocketFactory; import org.apache.http.conn.ssl.TrustSelfSignedStrategy; import org.apache.http.ssl.SSLContextBuilder; - +import org.apache.http.util.EntityUtils; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; @@ -89,7 +90,8 @@ public class BagGenerator { private int timeout = 60; private RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000) - .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000).build(); + .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000) + .setCookieSpec(CookieSpecs.STANDARD).build(); protected CloseableHttpClient client; private PoolingHttpClientConnectionManager cm = null; @@ -278,7 +280,8 @@ public boolean generateBag(OutputStream outputStream) throws Exception { } createFileFromString(manifestName, sha1StringBuffer.toString()); } else { - logger.warning("No Hash values sent - Bag File does not meet BagIT specification requirement"); + logger.warning("No Hash values (no files?) sending empty manifest to nominally comply with BagIT specification requirement"); + createFileFromString("manifest-md5.txt", ""); } // bagit.txt - Required by spec createFileFromString("bagit.txt", "BagIt-Version: 1.0\r\nTag-File-Character-Encoding: UTF-8"); @@ -463,7 +466,6 @@ private void validateBagFile(File bagFile) throws IOException { logger.info("Data Count: " + dataCount); logger.info("Data Size: " + totalDataSize); - //zf.close(); } public static String getValidName(String bagName) { @@ -1003,46 +1005,70 @@ private HttpGet createNewGetRequest(URI url, String returnType) { return request; } - InputStreamSupplier getInputStreamSupplier(final String uri) { + InputStreamSupplier getInputStreamSupplier(final String uriString) { return new InputStreamSupplier() { public InputStream get() { - int tries = 0; - while (tries < 5) { - try { - logger.fine("Get # " + tries + " for " + uri); - HttpGet getMap = createNewGetRequest(new URI(uri), null); - logger.finest("Retrieving " + tries + ": " + uri); - CloseableHttpResponse response; - //Note - if we ever need to pass an HttpClientContext, we need a new one per thread. - response = client.execute(getMap); - if (response.getStatusLine().getStatusCode() == 200) { - logger.finest("Retrieved: " + uri); - return response.getEntity().getContent(); - } - logger.fine("Status: " + response.getStatusLine().getStatusCode()); - tries++; - - } catch (ClientProtocolException e) { - tries += 5; - // TODO Auto-generated catch block - e.printStackTrace(); - } catch (IOException e) { - // Retry if this is a potentially temporary error such - // as a timeout - tries++; - logger.log(Level.WARNING,"Attempt# " + tries + " : Unable to retrieve file: " + uri, e); - if (tries == 5) { - logger.severe("Final attempt failed for " + uri); + try { + URI uri = new URI(uriString); + + int tries = 0; + while (tries < 5) { + + logger.fine("Get # " + tries + " for " + uriString); + HttpGet getFile = createNewGetRequest(uri, null); + logger.finest("Retrieving " + tries + ": " + uriString); + CloseableHttpResponse response = null; + try { + response = client.execute(getFile); + // Note - if we ever need to pass an HttpClientContext, we need a new one per + // thread. + int statusCode = response.getStatusLine().getStatusCode(); + if (statusCode == 200) { + logger.finest("Retrieved: " + uri); + return response.getEntity().getContent(); + } + logger.warning("Attempt: " + tries + " - Unexpected Status when retrieving " + uriString + + " : " + statusCode); + if (statusCode < 500) { + logger.fine("Will not retry for 40x errors"); + tries += 5; + } else { + tries++; + } + // Error handling + if (response != null) { + try { + EntityUtils.consumeQuietly(response.getEntity()); + response.close(); + } catch (IOException io) { + logger.warning( + "Exception closing response after status: " + statusCode + " on " + uri); + } + } + } catch (ClientProtocolException e) { + tries += 5; + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (IOException e) { + // Retry if this is a potentially temporary error such + // as a timeout + tries++; + logger.log(Level.WARNING, "Attempt# " + tries + " : Unable to retrieve file: " + uriString, + e); + if (tries == 5) { + logger.severe("Final attempt failed for " + uriString); + } + e.printStackTrace(); } - e.printStackTrace(); - } catch (URISyntaxException e) { - tries += 5; - // TODO Auto-generated catch block - e.printStackTrace(); + } + + } catch (URISyntaxException e) { + // TODO Auto-generated catch block + e.printStackTrace(); } - logger.severe("Could not read: " + uri); + logger.severe("Could not read: " + uriString); return null; } }; From d0163b5ae8772158b12a4e0185093c580d3a0cee Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 3 Jun 2022 13:15:52 -0400 Subject: [PATCH 077/608] get non-URL form of PID (now that OREMap uses the URL) --- .../edu/harvard/iq/dataverse/util/bagit/BagGenerator.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index 27bf96c3e71..2a3a34507b1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -73,6 +73,7 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFile.ChecksumType; +import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.util.json.JsonLDTerm; public class BagGenerator { @@ -206,7 +207,9 @@ public boolean generateBag(OutputStream outputStream) throws Exception { // The oremapObject is javax.json.JsonObject and we need com.google.gson.JsonObject for the aggregation object aggregation = (JsonObject) new JsonParser().parse(oremapObject.getJsonObject(JsonLDTerm.ore("describes").getLabel()).toString()); - bagID = aggregation.get("@id").getAsString() + "v." + String pidUrlString = aggregation.get("@id").getAsString(); + String pidString = GlobalId.parse(pidUrlString).get().asString(); + bagID = pidString + "v." + aggregation.get(JsonLDTerm.schemaOrg("version").getLabel()).getAsString(); logger.info("Generating Bag: " + bagID); From be58313d8c30af98e8779687f9cff62499f7df36 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 3 Jun 2022 13:40:08 -0400 Subject: [PATCH 078/608] Fix parsing - convert url to local pid form --- .../edu/harvard/iq/dataverse/util/bagit/BagGenerator.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index 2a3a34507b1..51e7ffd63de 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -208,7 +208,13 @@ public boolean generateBag(OutputStream outputStream) throws Exception { aggregation = (JsonObject) new JsonParser().parse(oremapObject.getJsonObject(JsonLDTerm.ore("describes").getLabel()).toString()); String pidUrlString = aggregation.get("@id").getAsString(); - String pidString = GlobalId.parse(pidUrlString).get().asString(); + String pidString=pidUrlString; + //ToDo - put this conversion in GlobalId + if(pidUrlString.startsWith(GlobalId.DOI_RESOLVER_URL)) { + pidString = pidUrlString.replace(GlobalId.DOI_RESOLVER_URL, (GlobalId.DOI_PROTOCOL + ":")); + } else if(pidUrlString.startsWith(GlobalId.HDL_RESOLVER_URL)) { + pidString = pidUrlString.replace(GlobalId.HDL_RESOLVER_URL, (GlobalId.HDL_PROTOCOL + ":")); + } bagID = pidString + "v." + aggregation.get(JsonLDTerm.schemaOrg("version").getLabel()).getAsString(); From 7e82009436f8b1f17fdc0caed8a0f65c3f0da500 Mon Sep 17 00:00:00 2001 From: Robert Treacy Date: Wed, 8 Jun 2022 14:58:48 -0400 Subject: [PATCH 079/608] use signedUrl for getting authenticated user. add allowedUrls field to ExtrenalTool --- .../iq/dataverse/api/AbstractApiBean.java | 19 +++++++++ .../dataverse/externaltools/ExternalTool.java | 40 ++++++++++++++++++- .../externaltools/ExternalToolHandler.java | 17 ++++++++ 3 files changed, 75 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java index d2c3f68dba2..24994497267 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java @@ -49,6 +49,7 @@ import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.UrlSignerUtil; import edu.harvard.iq.dataverse.util.json.JsonParser; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; import edu.harvard.iq.dataverse.validation.PasswordValidatorServiceBean; @@ -419,10 +420,28 @@ private AuthenticatedUser findAuthenticatedUserOrDie( String key, String wfid ) } else { throw new WrappedResponse(badWFKey(wfid)); } + } else { + AuthenticatedUser authUser = getAuthenticatedUserFromSignedUrl(); + if (authUser != null) { + return authUser; + } } //Just send info about the apiKey - workflow users will learn about invocationId elsewhere throw new WrappedResponse(badApiKey(null)); } + + private AuthenticatedUser getAuthenticatedUserFromSignedUrl() { + AuthenticatedUser authUser = null; + String signedUrl = httpRequest.getRequestURL().toString(); + String user = httpRequest.getParameter("user"); + String method = httpRequest.getMethod(); + String key = httpRequest.getParameter("token"); + boolean validated = UrlSignerUtil.isValidUrl(signedUrl, method, user, key); + if (validated){ + authUser = authSvc.getAuthenticatedUser(user); + } + return authUser; + } protected Dataverse findDataverseOrDie( String dvIdtf ) throws WrappedResponse { Dataverse dv = findDataverse(dvIdtf); diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java index c996e332bdb..b393ee7c747 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java @@ -41,6 +41,7 @@ public class ExternalTool implements Serializable { public static final String TOOL_PARAMETERS = "toolParameters"; public static final String CONTENT_TYPE = "contentType"; public static final String TOOL_NAME = "toolName"; + public static final String ALLOWED_URLS = "allowedUrls"; @Id @GeneratedValue(strategy = GenerationType.IDENTITY) @@ -97,6 +98,13 @@ public class ExternalTool implements Serializable { @Column(nullable = true, columnDefinition = "TEXT") private String contentType; + /** + * Path for retrieving data through the REST api. Used to build signedUrls + * for POST headers, as in DPCreator + */ + @Column(nullable = true, columnDefinition = "TEXT") + private String allowedUrls; + /** * This default constructor is only here to prevent this error at * deployment: @@ -122,6 +130,18 @@ public ExternalTool(String displayName, String toolName, String description, Lis this.contentType = contentType; } + public ExternalTool(String displayName, String toolName, String description, List externalToolTypes, Scope scope, String toolUrl, String toolParameters, String contentType, String allowedUrls) { + this.displayName = displayName; + this.toolName = toolName; + this.description = description; + this.externalToolTypes = externalToolTypes; + this.scope = scope; + this.toolUrl = toolUrl; + this.toolParameters = toolParameters; + this.contentType = contentType; + this.allowedUrls = allowedUrls; + } + public enum Type { EXPLORE("explore"), @@ -273,6 +293,9 @@ public JsonObjectBuilder toJson() { if (getContentType() != null) { jab.add(CONTENT_TYPE, getContentType()); } + if (getAllowedUrls()!= null) { + jab.add(ALLOWED_URLS,getAllowedUrls()); + } return jab; } @@ -292,7 +315,8 @@ public enum ReservedWord { DATASET_PID("datasetPid"), DATASET_VERSION("datasetVersion"), FILE_METADATA_ID("fileMetadataId"), - LOCALE_CODE("localeCode"); + LOCALE_CODE("localeCode"), + ALLOWED_URLS("allowedUrls"); private final String text; private final String START = "{"; @@ -355,5 +379,19 @@ public String getDisplayNameLang() { return displayName; } + /** + * @return the allowedUrls + */ + public String getAllowedUrls() { + return allowedUrls; + } + + /** + * @param allowedUrls the allowedUrls to set + */ + public void setAllowedUrls(String allowedUrls) { + this.allowedUrls = allowedUrls; + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java index baa386485d3..8061303b434 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java @@ -33,6 +33,20 @@ */ public class ExternalToolHandler { + /** + * @return the allowedUrls + */ + public String getAllowedUrls() { + return allowedUrls; + } + + /** + * @param allowedUrls the allowedUrls to set + */ + public void setAllowedUrls(String allowedUrls) { + this.allowedUrls = allowedUrls; + } + /** * @param user the user to set */ @@ -53,6 +67,7 @@ public void setUser(String user) { private String toolContext; private String user; private String siteUrl; + private String allowedUrls; /** * File level tool @@ -209,6 +224,8 @@ private String getQueryParam(String key, String value) { } case LOCALE_CODE: return key + "=" + getLocaleCode(); + case ALLOWED_URLS: + return key + "=" + getAllowedUrls(); default: break; } From a9d9b4fc7c11e6c4e56391b06ae28aa9179afbc6 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 8 Jun 2022 15:15:00 -0400 Subject: [PATCH 080/608] fix display width in dataset md edit mode --- src/main/webapp/metadataFragment.xhtml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/webapp/metadataFragment.xhtml b/src/main/webapp/metadataFragment.xhtml index 0104615025c..324fd2e0b84 100755 --- a/src/main/webapp/metadataFragment.xhtml +++ b/src/main/webapp/metadataFragment.xhtml @@ -315,7 +315,7 @@ - +
From c6aa44951beb54a55070536d48fac55cab22e80b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 8 Jun 2022 15:15:15 -0400 Subject: [PATCH 081/608] flyway for instructions column in template --- .../db/migration/V5.11.0.2__hdc-3b2-template-instructions.sql | 1 + 1 file changed, 1 insertion(+) create mode 100644 src/main/resources/db/migration/V5.11.0.2__hdc-3b2-template-instructions.sql diff --git a/src/main/resources/db/migration/V5.11.0.2__hdc-3b2-template-instructions.sql b/src/main/resources/db/migration/V5.11.0.2__hdc-3b2-template-instructions.sql new file mode 100644 index 00000000000..ee58d91333d --- /dev/null +++ b/src/main/resources/db/migration/V5.11.0.2__hdc-3b2-template-instructions.sql @@ -0,0 +1 @@ +ALTER TABLE template ADD COLUMN IF NOT EXISTS instructions TEXT; From 7c9fa06aa274331aedec4d5ffa2889e37c55389e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 9 Jun 2022 11:09:21 -0400 Subject: [PATCH 082/608] fix for validation method/comments --- .../iq/dataverse/api/AbstractApiBean.java | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java index 24994497267..402908c57e3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java @@ -432,14 +432,22 @@ private AuthenticatedUser findAuthenticatedUserOrDie( String key, String wfid ) private AuthenticatedUser getAuthenticatedUserFromSignedUrl() { AuthenticatedUser authUser = null; - String signedUrl = httpRequest.getRequestURL().toString(); + // The signUrl contains a param telling which user this is supposed to be for. + // We don't trust this. So we lookup that user, and get their API key, and use + // that as a secret in validation the signedURL. If the signature can't be + // validating with their key, the user (or their API key) has been changed and + // we reject the request. + //ToDo - add null checks/ verify that calling methods catch things. String user = httpRequest.getParameter("user"); + AuthenticatedUser targetUser = authSvc.getAuthenticatedUser(user); + String key = authSvc.findApiTokenByUser(targetUser).getTokenString(); + String signedUrl = httpRequest.getRequestURL().toString(); String method = httpRequest.getMethod(); - String key = httpRequest.getParameter("token"); + boolean validated = UrlSignerUtil.isValidUrl(signedUrl, method, user, key); if (validated){ - authUser = authSvc.getAuthenticatedUser(user); - } + authUser = targetUser; + } return authUser; } From 39180ccbba124894e5d5f7a999e07ed11b36fb46 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 9 Jun 2022 11:09:40 -0400 Subject: [PATCH 083/608] JSON API call to request signedUrl --- .../edu/harvard/iq/dataverse/api/Admin.java | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 4085b504578..4ab542b469c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -31,6 +31,7 @@ import edu.harvard.iq.dataverse.authorization.providers.shib.ShibAuthenticationProvider; import edu.harvard.iq.dataverse.authorization.providers.shib.ShibServiceBean; import edu.harvard.iq.dataverse.authorization.providers.shib.ShibUtil; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.confirmemail.ConfirmEmailData; import edu.harvard.iq.dataverse.confirmemail.ConfirmEmailException; @@ -44,6 +45,7 @@ import javax.json.Json; import javax.json.JsonArrayBuilder; import javax.json.JsonObjectBuilder; +import javax.ws.rs.Consumes; import javax.ws.rs.DELETE; import javax.ws.rs.GET; import javax.ws.rs.POST; @@ -94,6 +96,7 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.UrlSignerUtil; import java.io.IOException; import java.io.OutputStream; @@ -2061,4 +2064,43 @@ public Response getBannerMessages(@PathParam("id") Long id) throws WrappedRespon .collect(toJsonArray())); } + + @POST + @Consumes("application/json") + @Path("/requestSignedUrl") + public Response getSignedUrl(JsonObject urlInfo) throws WrappedResponse { + AuthenticatedUser superuser = authSvc.getAdminUser(); + + if (superuser == null) { + return error(Response.Status.FORBIDDEN, "Requesting signed URLs is restricted to superusers."); + } + + String userId = urlInfo.getString("user"); + String key=null; + if(userId!=null) { + AuthenticatedUser user = authSvc.getAuthenticatedUser(userId); + if(user!=null) { + ApiToken apiToken = authSvc.findApiTokenByUser(user); + if(apiToken!=null && !apiToken.isExpired() && ! apiToken.isDisabled()) { + key = apiToken.getTokenString(); + } + } else { + userId=superuser.getIdentifier(); + //We ~know this exists - the superuser just used it and it was unexpired/not disabled. (ToDo - if we want this to work with workflow tokens (or as a signed URL, we should do more checking as for the user above)) + } + key = authSvc.findApiTokenByUser(superuser).getTokenString(); + } + if(key==null) { + return error(Response.Status.CONFLICT, "Do not have a valid user with apiToken"); + } + + String baseUrl = urlInfo.getString("url"); + int timeout = urlInfo.getInt("timeout", 10); + String method = urlInfo.getString("method", "GET"); + + String signedUrl = UrlSignerUtil.signUrl(baseUrl, timeout, userId, method, key); + + return ok(Json.createObjectBuilder().add("signedUrl", signedUrl)); + } + } From 55fafa573fba5ab9ed89ab0c706be14357410aaa Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 9 Jun 2022 12:04:56 -0400 Subject: [PATCH 084/608] json read object/array from string methods from other branches --- .../harvard/iq/dataverse/util/json/JsonUtil.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java index ae6935945e8..ef506990f69 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java @@ -3,6 +3,8 @@ import com.google.gson.Gson; import com.google.gson.GsonBuilder; import com.google.gson.JsonObject; + +import java.io.StringReader; import java.io.StringWriter; import java.util.HashMap; import java.util.Map; @@ -55,5 +57,16 @@ public static String prettyPrint(javax.json.JsonObject jsonObject) { } return stringWriter.toString(); } + + public static javax.json.JsonObject getJsonObject(String serializedJson) { + try (StringReader rdr = new StringReader(serializedJson)) { + return Json.createReader(rdr).readObject(); + } + } + public static JsonArray getJsonArray(String serializedJson) { + try (StringReader rdr = new StringReader(serializedJson)) { + return Json.createReader(rdr).readArray(); + } + } } From 125d45818257426e0cb5b10314982acf44b996cb Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 13 Jun 2022 21:36:04 -0400 Subject: [PATCH 085/608] update flyway --- .../V5.11.0.2__hdc-3b2-template-instructions.sql | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/main/resources/db/migration/V5.11.0.2__hdc-3b2-template-instructions.sql b/src/main/resources/db/migration/V5.11.0.2__hdc-3b2-template-instructions.sql index ee58d91333d..df1d3068159 100644 --- a/src/main/resources/db/migration/V5.11.0.2__hdc-3b2-template-instructions.sql +++ b/src/main/resources/db/migration/V5.11.0.2__hdc-3b2-template-instructions.sql @@ -1 +1,14 @@ ALTER TABLE template ADD COLUMN IF NOT EXISTS instructions TEXT; + +ALTER TABLE dataset ADD COLUMN IF NOT EXISTS template_id BIGINT; + +DO $$ +BEGIN + + BEGIN + ALTER TABLE dataset ADD CONSTRAINT fx_dataset_template_id FOREIGN KEY (template_id) REFERENCES template(id); + EXCEPTION + WHEN duplicate_object THEN RAISE NOTICE 'Table constraint fk_dataset_template_id already exists'; + END; + +END $$; From 6f57d11aa09423f0d9b82360d7cafe672caf643b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 13 Jun 2022 23:37:38 -0400 Subject: [PATCH 086/608] OK when metadata doesn't exist and won't send message --- .../workflow/internalspi/LDNAnnounceDatasetVersionStep.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java index 3388e54e5bf..5b570fe2e96 100644 --- a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java +++ b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java @@ -81,6 +81,10 @@ public WorkflowStepResult run(WorkflowContext context) { } catch (URISyntaxException e) { return new Failure("LDNAnnounceDatasetVersion workflow step failed: unable to parse inbox in :LDNTarget setting."); } + if(announcement==null) { + logger.info(context.getDataset().getGlobalId().asString() + "does not have metadata required to send LDN message. Nothing sent."); + return OK; + } // execute try (CloseableHttpResponse response = client.execute(announcement)) { int code = response.getStatusLine().getStatusCode(); @@ -114,7 +118,7 @@ public void rollback(WorkflowContext context, Failure reason) { throw new UnsupportedOperationException("Not supported yet."); // This class does not need to resume. } - HttpPost buildAnnouncement(boolean b, WorkflowContext ctxt, JsonObject target) throws URISyntaxException { + HttpPost buildAnnouncement(boolean qb, WorkflowContext ctxt, JsonObject target) throws URISyntaxException { // First check that we have what is required DatasetVersion dv = ctxt.getDataset().getReleasedVersion(); From 881e3db2564eab58dc91b76162b9465cc1d5f2b1 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 14 Jun 2022 12:52:32 -0400 Subject: [PATCH 087/608] define/use an additional secret key --- .../java/edu/harvard/iq/dataverse/api/AbstractApiBean.java | 2 +- src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 2 +- .../java/edu/harvard/iq/dataverse/util/SystemConfig.java | 5 +++++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java index 402908c57e3..4adac3feace 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java @@ -440,7 +440,7 @@ private AuthenticatedUser getAuthenticatedUserFromSignedUrl() { //ToDo - add null checks/ verify that calling methods catch things. String user = httpRequest.getParameter("user"); AuthenticatedUser targetUser = authSvc.getAuthenticatedUser(user); - String key = authSvc.findApiTokenByUser(targetUser).getTokenString(); + String key = System.getProperty(SystemConfig.API_SIGNING_SECRET,"") + authSvc.findApiTokenByUser(targetUser).getTokenString(); String signedUrl = httpRequest.getRequestURL().toString(); String method = httpRequest.getMethod(); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 4ab542b469c..f0546aaca30 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -2088,7 +2088,7 @@ public Response getSignedUrl(JsonObject urlInfo) throws WrappedResponse { userId=superuser.getIdentifier(); //We ~know this exists - the superuser just used it and it was unexpired/not disabled. (ToDo - if we want this to work with workflow tokens (or as a signed URL, we should do more checking as for the user above)) } - key = authSvc.findApiTokenByUser(superuser).getTokenString(); + key = System.getProperty(SystemConfig.API_SIGNING_SECRET,"") + authSvc.findApiTokenByUser(superuser).getTokenString(); } if(key==null) { return error(Response.Status.CONFLICT, "Do not have a valid user with apiToken"); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index 6ea63e2b51f..3c7f05bec1e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -124,6 +124,11 @@ public class SystemConfig { public final static String DEFAULTCURATIONLABELSET = "DEFAULT"; public final static String CURATIONLABELSDISABLED = "DISABLED"; + // A secret used in signing URLs - individual urls are signed using this and the + // intended user's apiKey, creating an aggregate key that is unique to the user + // but not known to the user (as their apiKey is) + public final static String API_SIGNING_SECRET = "dataverse.api-signing-secret;"; + public String getVersion() { return getVersion(false); } From 208ab95a947d66f17559853bedd4f870f76504a4 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 21 Jun 2022 11:24:05 -0400 Subject: [PATCH 088/608] refactor to allow URL token substitution outside tools framework --- .../dataverse/externaltools/ExternalTool.java | 58 ----- .../externaltools/ExternalToolHandler.java | 116 +-------- .../ExternalToolServiceBean.java | 3 +- .../iq/dataverse/util/URLTokenUtil.java | 231 ++++++++++++++++++ .../iq/dataverse/util/UrlTokenUtilTest.java | 50 ++++ 5 files changed, 289 insertions(+), 169 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/UrlTokenUtilTest.java diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java index b393ee7c747..476181af852 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java @@ -299,64 +299,6 @@ public JsonObjectBuilder toJson() { return jab; } - public enum ReservedWord { - - // TODO: Research if a format like "{reservedWord}" is easily parse-able or if another format would be - // better. The choice of curly braces is somewhat arbitrary, but has been observed in documenation for - // various REST APIs. For example, "Variable substitutions will be made when a variable is named in {brackets}." - // from https://swagger.io/specification/#fixed-fields-29 but that's for URLs. - FILE_ID("fileId"), - FILE_PID("filePid"), - SITE_URL("siteUrl"), - API_TOKEN("apiToken"), - // datasetId is the database id - DATASET_ID("datasetId"), - // datasetPid is the DOI or Handle - DATASET_PID("datasetPid"), - DATASET_VERSION("datasetVersion"), - FILE_METADATA_ID("fileMetadataId"), - LOCALE_CODE("localeCode"), - ALLOWED_URLS("allowedUrls"); - - private final String text; - private final String START = "{"; - private final String END = "}"; - - private ReservedWord(final String text) { - this.text = START + text + END; - } - - /** - * This is a centralized method that enforces that only reserved words - * are allowed to be used by external tools. External tool authors - * cannot pass their own query parameters through Dataverse such as - * "mode=mode1". - * - * @throws IllegalArgumentException - */ - public static ReservedWord fromString(String text) throws IllegalArgumentException { - if (text != null) { - for (ReservedWord reservedWord : ReservedWord.values()) { - if (text.equals(reservedWord.text)) { - return reservedWord; - } - } - } - // TODO: Consider switching to a more informative message that enumerates the valid reserved words. - boolean moreInformativeMessage = false; - if (moreInformativeMessage) { - throw new IllegalArgumentException("Unknown reserved word: " + text + ". A reserved word must be one of these values: " + Arrays.asList(ReservedWord.values()) + "."); - } else { - throw new IllegalArgumentException("Unknown reserved word: " + text); - } - } - - @Override - public String toString() { - return text; - } - } - public String getDescriptionLang() { String description = ""; if (this.toolName != null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java index 8061303b434..8a1e9661e3a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java @@ -3,10 +3,9 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.FileMetadata; -import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.authorization.users.ApiToken; -import edu.harvard.iq.dataverse.externaltools.ExternalTool.ReservedWord; -import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.URLTokenUtil; + import edu.harvard.iq.dataverse.util.UrlSignerUtil; import java.io.IOException; import java.io.StringReader; @@ -19,6 +18,7 @@ import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; + import javax.json.Json; import javax.json.JsonArray; import javax.json.JsonObject; @@ -31,8 +31,7 @@ * instantiated. Applies logic based on an {@link ExternalTool} specification, * such as constructing a URL to access that file. */ -public class ExternalToolHandler { - +public class ExternalToolHandler extends URLTokenUtil { /** * @return the allowedUrls */ @@ -54,15 +53,8 @@ public void setUser(String user) { this.user = user; } - private static final Logger logger = Logger.getLogger(ExternalToolHandler.class.getCanonicalName()); - private final ExternalTool externalTool; - private final DataFile dataFile; - private final Dataset dataset; - private final FileMetadata fileMetadata; - private ApiToken apiToken; - private String localeCode; private String requestMethod; private String toolContext; private String user; @@ -78,23 +70,9 @@ public void setUser(String user) { * used anonymously. */ public ExternalToolHandler(ExternalTool externalTool, DataFile dataFile, ApiToken apiToken, FileMetadata fileMetadata, String localeCode) { + super(dataFile, apiToken, fileMetadata, localeCode); this.externalTool = externalTool; toolContext = externalTool.getToolUrl(); - if (dataFile == null) { - String error = "A DataFile is required."; - logger.warning("Error in ExternalToolHandler constructor: " + error); - throw new IllegalArgumentException(error); - } - if (fileMetadata == null) { - String error = "A FileMetadata is required."; - logger.warning("Error in ExternalToolHandler constructor: " + error); - throw new IllegalArgumentException(error); - } - this.dataFile = dataFile; - this.apiToken = apiToken; - this.fileMetadata = fileMetadata; - dataset = fileMetadata.getDatasetVersion().getDataset(); - this.localeCode = localeCode; } /** @@ -106,33 +84,8 @@ public ExternalToolHandler(ExternalTool externalTool, DataFile dataFile, ApiToke * used anonymously. */ public ExternalToolHandler(ExternalTool externalTool, Dataset dataset, ApiToken apiToken, String localeCode) { + super(dataset, apiToken, localeCode); this.externalTool = externalTool; - if (dataset == null) { - String error = "A Dataset is required."; - logger.warning("Error in ExternalToolHandler constructor: " + error); - throw new IllegalArgumentException(error); - } - this.dataset = dataset; - this.apiToken = apiToken; - this.dataFile = null; - this.fileMetadata = null; - this.localeCode = localeCode; - } - - public DataFile getDataFile() { - return dataFile; - } - - public FileMetadata getFileMetadata() { - return fileMetadata; - } - - public ApiToken getApiToken() { - return apiToken; - } - - public String getLocaleCode() { - return localeCode; } // TODO: rename to handleRequest() to someday handle sending headers as well as query parameters. @@ -175,63 +128,6 @@ public String handleRequest(boolean preview) { } } - private String getQueryParam(String key, String value) { - ReservedWord reservedWord = ReservedWord.fromString(value); - switch (reservedWord) { - case FILE_ID: - // getDataFile is never null for file tools because of the constructor - return key + "=" + getDataFile().getId(); - case FILE_PID: - GlobalId filePid = getDataFile().getGlobalId(); - if (filePid != null) { - return key + "=" + getDataFile().getGlobalId(); - } - break; - case SITE_URL: - siteUrl = SystemConfig.getDataverseSiteUrlStatic(); - return key + "=" + siteUrl; - case API_TOKEN: - String apiTokenString = null; - ApiToken theApiToken = getApiToken(); - if (theApiToken != null) { - apiTokenString = theApiToken.getTokenString(); - return key + "=" + apiTokenString; - } - break; - case DATASET_ID: - return key + "=" + dataset.getId(); - case DATASET_PID: - return key + "=" + dataset.getGlobalId().asString(); - case DATASET_VERSION: - String versionString = null; - if(fileMetadata!=null) { //true for file case - versionString = fileMetadata.getDatasetVersion().getFriendlyVersionNumber(); - } else { //Dataset case - return the latest visible version (unless/until the dataset case allows specifying a version) - if (getApiToken() != null) { - versionString = dataset.getLatestVersion().getFriendlyVersionNumber(); - } else { - versionString = dataset.getLatestVersionForCopy().getFriendlyVersionNumber(); - } - } - if (("DRAFT").equals(versionString)) { - versionString = ":draft"; // send the token needed in api calls that can be substituted for a numeric - // version. - } - return key + "=" + versionString; - case FILE_METADATA_ID: - if(fileMetadata!=null) { //true for file case - return key + "=" + fileMetadata.getId(); - } - case LOCALE_CODE: - return key + "=" + getLocaleCode(); - case ALLOWED_URLS: - return key + "=" + getAllowedUrls(); - default: - break; - } - return null; - } - private String postFormData(Integer timeout,List params ) throws IOException, InterruptedException{ String url = ""; diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java index 95fd900e4d2..d49d66c26f7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java @@ -3,8 +3,9 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFileServiceBean; import edu.harvard.iq.dataverse.authorization.users.ApiToken; -import edu.harvard.iq.dataverse.externaltools.ExternalTool.ReservedWord; import edu.harvard.iq.dataverse.externaltools.ExternalTool.Type; +import edu.harvard.iq.dataverse.util.URLTokenUtil; +import edu.harvard.iq.dataverse.util.URLTokenUtil.ReservedWord; import edu.harvard.iq.dataverse.externaltools.ExternalTool.Scope; import java.io.StringReader; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java new file mode 100644 index 00000000000..78280cd0f0f --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java @@ -0,0 +1,231 @@ +package edu.harvard.iq.dataverse.util; + +import java.util.Arrays; +import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.FileMetadata; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; + +public class URLTokenUtil { + + protected static final Logger logger = Logger.getLogger(URLTokenUtil.class.getCanonicalName()); + protected final DataFile dataFile; + protected final Dataset dataset; + protected final FileMetadata fileMetadata; + protected ApiToken apiToken; + protected String localeCode; + + /** + * File level + * + * @param dataFile Required. + * @param apiToken The apiToken can be null + * @param fileMetadata Required. + * @param localeCode optional. + * + */ + public URLTokenUtil(DataFile dataFile, ApiToken apiToken, FileMetadata fileMetadata, String localeCode) + throws IllegalArgumentException { + if (dataFile == null) { + String error = "A DataFile is required."; + logger.warning("Error in URLTokenUtil constructor: " + error); + throw new IllegalArgumentException(error); + } + if (fileMetadata == null) { + String error = "A FileMetadata is required."; + logger.warning("Error in URLTokenUtil constructor: " + error); + throw new IllegalArgumentException(error); + } + this.dataFile = dataFile; + this.dataset = fileMetadata.getDatasetVersion().getDataset(); + this.fileMetadata = fileMetadata; + this.apiToken = apiToken; + this.localeCode = localeCode; + } + + /** + * Dataset level + * + * @param dataset Required. + * @param apiToken The apiToken can be null + */ + public URLTokenUtil(Dataset dataset, ApiToken apiToken, String localeCode) { + if (dataset == null) { + String error = "A Dataset is required."; + logger.warning("Error in URLTokenUtil constructor: " + error); + throw new IllegalArgumentException(error); + } + this.dataset = dataset; + this.dataFile = null; + this.fileMetadata = null; + this.apiToken = apiToken; + this.localeCode = localeCode; + } + + public DataFile getDataFile() { + return dataFile; + } + + public FileMetadata getFileMetadata() { + return fileMetadata; + } + + public ApiToken getApiToken() { + return apiToken; + } + + public String getLocaleCode() { + return localeCode; + } + + public String getQueryParam(String key, String value) { + String tokenValue = null; + tokenValue = getTokenValue(value); + if (tokenValue != null) { + return key + '=' + tokenValue; + } else { + return null; + } + } + + /** + * Tries to replace all occurrences of {} with the value for the + * corresponding ReservedWord + * + * @param url - the input string in which to replace tokens, normally a url + * @throws IllegalArgumentException if there is no matching ReservedWord or if + * the configuation of this instance doesn't + * have values for this ReservedWord (e.g. + * asking for FILE_PID when using the dataset + * constructor, etc.) + */ + public String replaceTokensWithValues(String url) { + String newUrl = url; + Pattern pattern = Pattern.compile("(\\{.*?\\})"); + Matcher matcher = pattern.matcher(url); + while(matcher.find()) { + String token = matcher.group(1); + ReservedWord reservedWord = ReservedWord.fromString(token); + String tValue = getTokenValue(token); + logger.info("Replacing " + reservedWord.toString() + " with " + tValue + " in " + newUrl); + newUrl = newUrl.replace(reservedWord.toString(), tValue); + } + return newUrl; + } + + private String getTokenValue(String value) { + ReservedWord reservedWord = ReservedWord.fromString(value); + switch (reservedWord) { + case FILE_ID: + // getDataFile is never null for file tools because of the constructor + return getDataFile().getId().toString(); + case FILE_PID: + GlobalId filePid = getDataFile().getGlobalId(); + if (filePid != null) { + return getDataFile().getGlobalId().asString(); + } + break; + case SITE_URL: + return SystemConfig.getDataverseSiteUrlStatic(); + case API_TOKEN: + String apiTokenString = null; + ApiToken theApiToken = getApiToken(); + if (theApiToken != null) { + apiTokenString = theApiToken.getTokenString(); + } + return apiTokenString; + case DATASET_ID: + return dataset.getId().toString(); + case DATASET_PID: + return dataset.getGlobalId().asString(); + case DATASET_VERSION: + String versionString = null; + if (fileMetadata != null) { // true for file case + versionString = fileMetadata.getDatasetVersion().getFriendlyVersionNumber(); + } else { // Dataset case - return the latest visible version (unless/until the dataset + // case allows specifying a version) + if (getApiToken() != null) { + versionString = dataset.getLatestVersion().getFriendlyVersionNumber(); + } else { + versionString = dataset.getLatestVersionForCopy().getFriendlyVersionNumber(); + } + } + if (("DRAFT").equals(versionString)) { + versionString = ":draft"; // send the token needed in api calls that can be substituted for a numeric + // version. + } + return versionString; + case FILE_METADATA_ID: + if (fileMetadata != null) { // true for file case + return fileMetadata.getId().toString(); + } + case LOCALE_CODE: + return getLocaleCode(); + default: + break; + } + throw new IllegalArgumentException("Cannot replace reserved word: " + value); + } + + public enum ReservedWord { + + // TODO: Research if a format like "{reservedWord}" is easily parse-able or if + // another format would be + // better. The choice of curly braces is somewhat arbitrary, but has been + // observed in documentation for + // various REST APIs. For example, "Variable substitutions will be made when a + // variable is named in {brackets}." + // from https://swagger.io/specification/#fixed-fields-29 but that's for URLs. + FILE_ID("fileId"), FILE_PID("filePid"), SITE_URL("siteUrl"), API_TOKEN("apiToken"), + // datasetId is the database id + DATASET_ID("datasetId"), + // datasetPid is the DOI or Handle + DATASET_PID("datasetPid"), DATASET_VERSION("datasetVersion"), FILE_METADATA_ID("fileMetadataId"), + LOCALE_CODE("localeCode"); + + private final String text; + private final String START = "{"; + private final String END = "}"; + + private ReservedWord(final String text) { + this.text = START + text + END; + } + + /** + * This is a centralized method that enforces that only reserved words are + * allowed to be used by external tools. External tool authors cannot pass their + * own query parameters through Dataverse such as "mode=mode1". + * + * @throws IllegalArgumentException + */ + public static ReservedWord fromString(String text) throws IllegalArgumentException { + if (text != null) { + for (ReservedWord reservedWord : ReservedWord.values()) { + if (text.equals(reservedWord.text)) { + return reservedWord; + } + } + } + // TODO: Consider switching to a more informative message that enumerates the + // valid reserved words. + boolean moreInformativeMessage = false; + if (moreInformativeMessage) { + throw new IllegalArgumentException( + "Unknown reserved word: " + text + ". A reserved word must be one of these values: " + + Arrays.asList(ReservedWord.values()) + "."); + } else { + throw new IllegalArgumentException("Unknown reserved word: " + text); + } + } + + @Override + public String toString() { + return text; + } + } +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/UrlTokenUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/UrlTokenUtilTest.java new file mode 100644 index 00000000000..ffc6b813045 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/UrlTokenUtilTest.java @@ -0,0 +1,50 @@ +package edu.harvard.iq.dataverse.util; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.FileMetadata; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import static org.junit.Assert.assertEquals; +import static org.mockito.Mockito.when; + +import java.util.ArrayList; +import java.util.List; + +import org.junit.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +public class UrlTokenUtilTest { + + @Test + public void testGetToolUrlWithOptionalQueryParameters() { + + DataFile dataFile = new DataFile(); + dataFile.setId(42l); + FileMetadata fmd = new FileMetadata(); + DatasetVersion dv = new DatasetVersion(); + Dataset ds = new Dataset(); + ds.setId(50L); + ds.setGlobalId(new GlobalId("doi:10.5072/FK2ABCDEF")); + dv.setDataset(ds); + fmd.setDatasetVersion(dv); + List fmdl = new ArrayList(); + fmdl.add(fmd); + dataFile.setFileMetadatas(fmdl); + ApiToken apiToken = new ApiToken(); + apiToken.setTokenString("7196b5ce-f200-4286-8809-03ffdbc255d7"); + URLTokenUtil urlTokenUtil = new URLTokenUtil(dataFile, apiToken, fmd, "en"); + assertEquals("en", urlTokenUtil.replaceTokensWithValues("{localeCode}")); + assertEquals("42 test en", urlTokenUtil.replaceTokensWithValues("{fileId} test {localeCode}")); + assertEquals("42 test en", urlTokenUtil.replaceTokensWithValues("{fileId} test {localeCode}")); + + assertEquals("https://librascholar.org/api/files/42/metadata?key=" + apiToken.getTokenString(), urlTokenUtil.replaceTokensWithValues("{siteUrl}/api/files/{fileId}/metadata?key={apiToken}")); + + URLTokenUtil urlTokenUtil2 = new URLTokenUtil(ds, apiToken, "en"); + assertEquals("https://librascholar.org/api/datasets/50?key=" + apiToken.getTokenString(), urlTokenUtil2.replaceTokensWithValues("{siteUrl}/api/datasets/{datasetId}?key={apiToken}")); + assertEquals("https://librascholar.org/api/datasets/:persistentId/?persistentId=doi:10.5072/FK2ABCDEF&key=" + apiToken.getTokenString(), urlTokenUtil2.replaceTokensWithValues("{siteUrl}/api/datasets/:persistentId/?persistentId={datasetPid}&key={apiToken}")); + } +} From 49286e361964161cc8a02d092037fd3919d6c81d Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 23 Jun 2022 12:18:56 +0200 Subject: [PATCH 089/608] license sorting: renamed sql script --- ...-sorting_licenses.sql => V5.11.0.1__8671-sorting_licenses.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/main/resources/db/migration/{V5.10.1.3__8671-sorting_licenses.sql => V5.11.0.1__8671-sorting_licenses.sql} (100%) diff --git a/src/main/resources/db/migration/V5.10.1.3__8671-sorting_licenses.sql b/src/main/resources/db/migration/V5.11.0.1__8671-sorting_licenses.sql similarity index 100% rename from src/main/resources/db/migration/V5.10.1.3__8671-sorting_licenses.sql rename to src/main/resources/db/migration/V5.11.0.1__8671-sorting_licenses.sql From 3d7683ff094303c177fab0e610f8c31086eebd85 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 24 Jun 2022 14:56:42 -0400 Subject: [PATCH 090/608] update package_id --- .../engine/command/impl/DRSSubmitToArchiveCommand.java | 7 ++++++- .../engine/command/impl/S3SubmitToArchiveCommand.java | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 80969c80c3e..7ded0f785ed 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -97,7 +97,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t Dataverse ancestor = dataset.getOwner(); String alias = getArchivableAncestor(ancestor, collections); String spaceName = getSpaceName(dataset); - String packageId = spaceName + ".v" + dv.getFriendlyVersionNumber(); + String packageId = getFileName(spaceName, dv); if (alias != null) { if (drsConfigObject.getBoolean("single_version", false)) { @@ -288,6 +288,11 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t return WorkflowStepResult.OK; } + @Override + protected String getFileName(String spaceName, DatasetVersion dv) { + return spaceName + (".v" + dv.getFriendlyVersionNumber()).replace('.', '_'); + } + public static String createJWTString(Algorithm algorithmRSA, String installationBrandName, String body, int expirationInMinutes) throws IOException { String canonicalBody = new JsonCanonicalizer(body).getEncodedString(); logger.fine("Canonical body: " + canonicalBody); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java index 391a2f7c94a..af7dc86bf4b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java @@ -94,7 +94,8 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } // Store BagIt file - String fileName = spaceName + ".v" + dv.getFriendlyVersionNumber(); + String fileName = getFileName(spaceName, dv); + String bagKey = spaceName + "/" + fileName + ".zip"; // Add BagIt ZIP file // Google uses MD5 as one way to verify the @@ -157,6 +158,10 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } } + protected String getFileName(String spaceName, DatasetVersion dv) { + return spaceName + ".v" + dv.getFriendlyVersionNumber(); + } + protected String getSpaceName(Dataset dataset) { if (spaceName == null) { spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-').replace('.', '-') From 18d3f5d999157ddb492993cabf977d7aa596e49a Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 24 Jun 2022 16:15:42 -0400 Subject: [PATCH 091/608] allow datacite filename override --- .../engine/command/impl/DRSSubmitToArchiveCommand.java | 6 ++++++ .../engine/command/impl/S3SubmitToArchiveCommand.java | 7 +++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 7ded0f785ed..f185359d32e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -292,6 +292,12 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t protected String getFileName(String spaceName, DatasetVersion dv) { return spaceName + (".v" + dv.getFriendlyVersionNumber()).replace('.', '_'); } + + @Override + protected String getDataCiteFileName(String spaceName, DatasetVersion dv) { + return spaceName + ("_datacite.v" + dv.getFriendlyVersionNumber()).replace('.','_'); + } + public static String createJWTString(Algorithm algorithmRSA, String installationBrandName, String body, int expirationInMinutes) throws IOException { String canonicalBody = new JsonCanonicalizer(body).getEncodedString(); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java index af7dc86bf4b..a18f99f4bee 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java @@ -84,8 +84,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t // Add datacite.xml file ObjectMetadata om = new ObjectMetadata(); om.setContentLength(dataciteIn.available()); - String dcKey = spaceName + "/" + spaceName + "_datacite.v" + dv.getFriendlyVersionNumber() - + ".xml"; + String dcKey = spaceName + "/" + getDataCiteFileName(spaceName, dv) + ".xml"; tm.upload(new PutObjectRequest(bucketName, dcKey, dataciteIn, om)).waitForCompletion(); om = s3.getObjectMetadata(bucketName, dcKey); if (om == null) { @@ -158,6 +157,10 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } } + protected String getDataCiteFileName(String spaceName, DatasetVersion dv) { + return spaceName + "_datacite.v" + dv.getFriendlyVersionNumber(); + } + protected String getFileName(String spaceName, DatasetVersion dv) { return spaceName + ".v" + dv.getFriendlyVersionNumber(); } From 9d4d815e3decc2b652845b0fb63e5d8a739537db Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 24 Jun 2022 17:02:05 -0400 Subject: [PATCH 092/608] fix missing field check --- .../workflow/internalspi/LDNAnnounceDatasetVersionStep.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java index 5b570fe2e96..3478d9398f0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java +++ b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java @@ -130,7 +130,7 @@ HttpPost buildAnnouncement(boolean qb, WorkflowContext ctxt, JsonObject target) } Set reqFields = fields.keySet(); for (DatasetField df : dvf) { - if (reqFields.contains(df.getDatasetFieldType().getName())) { + if(!df.isEmpty() && reqFields.contains(df.getDatasetFieldType().getName())) { fields.put(df.getDatasetFieldType().getName(), df); } } From 9f3fadbcbf1bf3d6c246a0e693015462eb90de89 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Jul 2022 16:38:02 -0400 Subject: [PATCH 093/608] missing property --- src/main/java/propertyFiles/Bundle.properties | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index c43f2c1ede9..4f5b1201e35 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -266,6 +266,7 @@ notification.typeDescription.WORKFLOW_SUCCESS=External workflow run has succeede notification.typeDescription.WORKFLOW_FAILURE=External workflow run has failed notification.typeDescription.STATUSUPDATED=Status of dataset has been updated notification.typeDescription.DATASETCREATED=Dataset was created by user +notification.typeDescription.DATASETMENTIONED=Dataset was referenced in remote system groupAndRoles.manageTips=Here is where you can access and manage all the groups you belong to, and the roles you have been assigned. user.message.signup.label=Create Account From 0c22b1839f6977f4ae0253905aa740cdac27976c Mon Sep 17 00:00:00 2001 From: Robert Treacy Date: Fri, 15 Jul 2022 17:45:05 -0400 Subject: [PATCH 094/608] sending a list of allowed api calls to DPCreator --- .../dataverse/externaltools/ExternalTool.java | 24 +++---- .../externaltools/ExternalToolHandler.java | 66 ++++++++++++++----- .../ExternalToolServiceBean.java | 6 +- .../iq/dataverse/util/URLTokenUtil.java | 6 +- 4 files changed, 70 insertions(+), 32 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java index 476181af852..79c0e3dd8f1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java @@ -41,7 +41,7 @@ public class ExternalTool implements Serializable { public static final String TOOL_PARAMETERS = "toolParameters"; public static final String CONTENT_TYPE = "contentType"; public static final String TOOL_NAME = "toolName"; - public static final String ALLOWED_URLS = "allowedUrls"; + public static final String ALLOWED_API_CALLS = "allowedApiCalls"; @Id @GeneratedValue(strategy = GenerationType.IDENTITY) @@ -103,7 +103,7 @@ public class ExternalTool implements Serializable { * for POST headers, as in DPCreator */ @Column(nullable = true, columnDefinition = "TEXT") - private String allowedUrls; + private String allowedApiCalls; /** * This default constructor is only here to prevent this error at @@ -130,7 +130,7 @@ public ExternalTool(String displayName, String toolName, String description, Lis this.contentType = contentType; } - public ExternalTool(String displayName, String toolName, String description, List externalToolTypes, Scope scope, String toolUrl, String toolParameters, String contentType, String allowedUrls) { + public ExternalTool(String displayName, String toolName, String description, List externalToolTypes, Scope scope, String toolUrl, String toolParameters, String contentType, String allowedApiCalls) { this.displayName = displayName; this.toolName = toolName; this.description = description; @@ -139,7 +139,7 @@ public ExternalTool(String displayName, String toolName, String description, Lis this.toolUrl = toolUrl; this.toolParameters = toolParameters; this.contentType = contentType; - this.allowedUrls = allowedUrls; + this.allowedApiCalls = allowedApiCalls; } public enum Type { @@ -293,8 +293,8 @@ public JsonObjectBuilder toJson() { if (getContentType() != null) { jab.add(CONTENT_TYPE, getContentType()); } - if (getAllowedUrls()!= null) { - jab.add(ALLOWED_URLS,getAllowedUrls()); + if (getAllowedApiCalls()!= null) { + jab.add(ALLOWED_API_CALLS,getAllowedApiCalls()); } return jab; } @@ -322,17 +322,17 @@ public String getDisplayNameLang() { } /** - * @return the allowedUrls + * @return the allowedApiCalls */ - public String getAllowedUrls() { - return allowedUrls; + public String getAllowedApiCalls() { + return allowedApiCalls; } /** - * @param allowedUrls the allowedUrls to set + * @param allowedApiCalls the allowedApiCalls to set */ - public void setAllowedUrls(String allowedUrls) { - this.allowedUrls = allowedUrls; + public void setAllowedApiCalls(String allowedApiCalls) { + this.allowedApiCalls = allowedApiCalls; } diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java index 8a1e9661e3a..83440608350 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java @@ -9,6 +9,7 @@ import edu.harvard.iq.dataverse.util.UrlSignerUtil; import java.io.IOException; import java.io.StringReader; +import java.io.StringWriter; import java.net.HttpURLConnection; import java.net.URI; import java.net.http.HttpClient; @@ -21,9 +22,11 @@ import javax.json.Json; import javax.json.JsonArray; +import javax.json.JsonArrayBuilder; import javax.json.JsonObject; import javax.json.JsonReader; import javax.json.JsonString; +import javax.json.JsonWriter; import javax.ws.rs.HttpMethod; /** @@ -33,17 +36,17 @@ */ public class ExternalToolHandler extends URLTokenUtil { /** - * @return the allowedUrls + * @return the allowedApiCalls */ - public String getAllowedUrls() { - return allowedUrls; + public String getAllowedApiCalls() { + return allowedApiCalls; } /** - * @param allowedUrls the allowedUrls to set + * @param allowedApiCalls the allowedApiCalls to set */ - public void setAllowedUrls(String allowedUrls) { - this.allowedUrls = allowedUrls; + public void setAllowedApiCalls(String allowedApiCalls) { + this.allowedApiCalls = allowedApiCalls; } /** @@ -59,7 +62,7 @@ public void setUser(String user) { private String toolContext; private String user; private String siteUrl; - private String allowedUrls; + private String allowedApiCalls; /** * File level tool @@ -113,14 +116,44 @@ public String handleRequest(boolean preview) { params.add(param); } }); - }); - if (requestMethod.equals(HttpMethod.POST)){ + }); + + StringWriter allowedApiCallsStringWriter = new StringWriter(); + String allowedApis; + try (JsonWriter jsonWriter = Json.createWriter(allowedApiCallsStringWriter)) { + JsonArrayBuilder jsonArrayBuilder = Json.createArrayBuilder(); + allowedApiCalls = externalTool.getAllowedApiCalls(); + JsonReader jsonReaderApis = Json.createReader(new StringReader(allowedApiCalls)); + JsonObject objApis = jsonReaderApis.readObject(); + JsonArray apis = objApis.getJsonArray("apis"); + apis.getValuesAs(JsonObject.class).forEach(((apiObj) -> { + String name = apiObj.getJsonString("name").toString(); + String httpmethod = apiObj.getJsonString("method").toString(); + int timeout = apiObj.getInt("timeOut"); + String apiPath = replaceTokensWithValues(apiObj.getJsonString("urlTemplate").toString()); + String url = UrlSignerUtil.signUrl(apiPath, timeout, user,httpmethod, getApiToken().getTokenString()); + jsonArrayBuilder.add( + Json.createObjectBuilder().add("name", name) + .add("httpMethod", httpmethod) + .add("signedUrl", url) + .add("timeOut", timeout)); + })); + JsonArray allowedApiCallsArray = jsonArrayBuilder.build(); + jsonWriter.writeArray(allowedApiCallsArray); + allowedApis = allowedApiCallsStringWriter.toString(); try { - return postFormData(obj.getJsonNumber("timeOut").intValue(), params); - } catch (IOException | InterruptedException ex) { + allowedApiCallsStringWriter.close(); + } catch (IOException ex) { Logger.getLogger(ExternalToolHandler.class.getName()).log(Level.SEVERE, null, ex); } } + if (requestMethod.equals(HttpMethod.POST)){ + try { + return postFormData(allowedApis); + } catch (IOException | InterruptedException ex) { + Logger.getLogger(ExternalToolHandler.class.getName()).log(Level.SEVERE, null, ex); + } + } if (!preview) { return "?" + String.join("&", params); } else { @@ -129,14 +162,11 @@ public String handleRequest(boolean preview) { } - private String postFormData(Integer timeout,List params ) throws IOException, InterruptedException{ - String url = ""; -// Integer timeout = obj.getJsonNumber("timeOut").intValue(); - url = UrlSignerUtil.signUrl(siteUrl, timeout, user, HttpMethod.POST, getApiToken().getTokenString()); + private String postFormData(String allowedApis ) throws IOException, InterruptedException{ + String url = null; HttpClient client = HttpClient.newHttpClient(); - HttpRequest request = HttpRequest.newBuilder().POST(HttpRequest.BodyPublishers.ofString(String.join("&", params))).uri(URI.create(externalTool.getToolUrl())) - .header("Content-Type", "application/x-www-form-urlencoded") - .header("signedUrl", url) + HttpRequest request = HttpRequest.newBuilder().POST(HttpRequest.BodyPublishers.ofString(allowedApis)).uri(URI.create(externalTool.getToolUrl())) + .header("Content-Type", "application/json") .build(); HttpResponse response = client.send(request, HttpResponse.BodyHandlers.ofString()); boolean redirect=false; diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java index d49d66c26f7..ad7cee14e4a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java @@ -169,6 +169,8 @@ public static ExternalTool parseAddExternalToolManifest(String manifest) { String toolUrl = getRequiredTopLevelField(jsonObject, TOOL_URL); JsonObject toolParametersObj = jsonObject.getJsonObject(TOOL_PARAMETERS); JsonArray queryParams = toolParametersObj.getJsonArray("queryParameters"); + JsonObject allowedApiCallsObj = jsonObject.getJsonObject(ALLOWED_API_CALLS); + JsonArray apis = allowedApiCallsObj.getJsonArray("apis"); boolean allRequiredReservedWordsFound = false; if (scope.equals(Scope.FILE)) { List requiredReservedWordCandidates = new ArrayList<>(); @@ -221,8 +223,10 @@ public static ExternalTool parseAddExternalToolManifest(String manifest) { } String toolParameters = toolParametersObj.toString(); + String allowedApiCalls = allowedApiCallsObj.toString(); - return new ExternalTool(displayName, toolName, description, externalToolTypes, scope, toolUrl, toolParameters, contentType); +// return new ExternalTool(displayName, toolName, description, externalToolTypes, scope, toolUrl, toolParameters, contentType); + return new ExternalTool(displayName, toolName, description, externalToolTypes, scope, toolUrl, toolParameters, contentType, allowedApiCalls); } private static String getRequiredTopLevelField(JsonObject jsonObject, String key) { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java index 78280cd0f0f..1a1e92a2802 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java @@ -166,6 +166,8 @@ private String getTokenValue(String value) { } case LOCALE_CODE: return getLocaleCode(); + case ALLOWED_API_CALLS: + default: break; } @@ -186,7 +188,9 @@ public enum ReservedWord { DATASET_ID("datasetId"), // datasetPid is the DOI or Handle DATASET_PID("datasetPid"), DATASET_VERSION("datasetVersion"), FILE_METADATA_ID("fileMetadataId"), - LOCALE_CODE("localeCode"); + LOCALE_CODE("localeCode"), + ALLOWED_API_CALLS ("allowedApiCalls"); + private final String text; private final String START = "{"; From 1b31e6c2c8b3eae21eb85818bf025dcd11d17f24 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 20 Jul 2022 17:24:37 -0400 Subject: [PATCH 095/608] tweak json read/write, getString, cleanup, logging --- .../dataverse/externaltools/ExternalTool.java | 8 +- .../externaltools/ExternalToolHandler.java | 80 +++++++------------ .../ExternalToolServiceBean.java | 4 +- 3 files changed, 36 insertions(+), 56 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java index 79c0e3dd8f1..bda9ebad063 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java @@ -20,7 +20,6 @@ import javax.persistence.Id; import javax.persistence.JoinColumn; import javax.persistence.OneToMany; -import javax.persistence.Transient; /** * A specification or definition for how an external tool is intended to @@ -30,8 +29,6 @@ @Entity public class ExternalTool implements Serializable { - private static final Logger logger = Logger.getLogger(ExternalToolServiceBean.class.getCanonicalName()); - public static final String DISPLAY_NAME = "displayName"; public static final String DESCRIPTION = "description"; public static final String LEGACY_SINGLE_TYPE = "type"; @@ -99,8 +96,9 @@ public class ExternalTool implements Serializable { private String contentType; /** - * Path for retrieving data through the REST api. Used to build signedUrls - * for POST headers, as in DPCreator + * Set of API calls the tool would like to be able to use (e,.g. for retrieving + * data through the Dataverse REST api). Used to build signedUrls for POST + * headers, as in DPCreator */ @Column(nullable = true, columnDefinition = "TEXT") private String allowedApiCalls; diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java index 83440608350..54489953606 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java @@ -7,6 +7,8 @@ import edu.harvard.iq.dataverse.util.URLTokenUtil; import edu.harvard.iq.dataverse.util.UrlSignerUtil; +import edu.harvard.iq.dataverse.util.json.JsonUtil; + import java.io.IOException; import java.io.StringReader; import java.io.StringWriter; @@ -35,19 +37,6 @@ * such as constructing a URL to access that file. */ public class ExternalToolHandler extends URLTokenUtil { - /** - * @return the allowedApiCalls - */ - public String getAllowedApiCalls() { - return allowedApiCalls; - } - - /** - * @param allowedApiCalls the allowedApiCalls to set - */ - public void setAllowedApiCalls(String allowedApiCalls) { - this.allowedApiCalls = allowedApiCalls; - } /** * @param user the user to set @@ -61,8 +50,7 @@ public void setUser(String user) { private String requestMethod; private String toolContext; private String user; - private String siteUrl; - private String allowedApiCalls; + /** * File level tool @@ -98,9 +86,7 @@ public String handleRequest() { // TODO: rename to handleRequest() to someday handle sending headers as well as query parameters. public String handleRequest(boolean preview) { - String toolParameters = externalTool.getToolParameters(); - JsonReader jsonReader = Json.createReader(new StringReader(toolParameters)); - JsonObject obj = jsonReader.readObject(); + JsonObject obj = JsonUtil.getJsonObject(externalTool.getToolParameters()); JsonString method = obj.getJsonString("httpMethod"); requestMethod = method!=null?method.getString():HttpMethod.GET; JsonArray queryParams = obj.getJsonArray("queryParameters"); @@ -118,36 +104,32 @@ public String handleRequest(boolean preview) { }); }); - StringWriter allowedApiCallsStringWriter = new StringWriter(); String allowedApis; - try (JsonWriter jsonWriter = Json.createWriter(allowedApiCallsStringWriter)) { - JsonArrayBuilder jsonArrayBuilder = Json.createArrayBuilder(); - allowedApiCalls = externalTool.getAllowedApiCalls(); - JsonReader jsonReaderApis = Json.createReader(new StringReader(allowedApiCalls)); - JsonObject objApis = jsonReaderApis.readObject(); - JsonArray apis = objApis.getJsonArray("apis"); - apis.getValuesAs(JsonObject.class).forEach(((apiObj) -> { - String name = apiObj.getJsonString("name").toString(); - String httpmethod = apiObj.getJsonString("method").toString(); - int timeout = apiObj.getInt("timeOut"); - String apiPath = replaceTokensWithValues(apiObj.getJsonString("urlTemplate").toString()); - String url = UrlSignerUtil.signUrl(apiPath, timeout, user,httpmethod, getApiToken().getTokenString()); - jsonArrayBuilder.add( - Json.createObjectBuilder().add("name", name) - .add("httpMethod", httpmethod) - .add("signedUrl", url) - .add("timeOut", timeout)); - })); - JsonArray allowedApiCallsArray = jsonArrayBuilder.build(); - jsonWriter.writeArray(allowedApiCallsArray); - allowedApis = allowedApiCallsStringWriter.toString(); - try { - allowedApiCallsStringWriter.close(); - } catch (IOException ex) { - Logger.getLogger(ExternalToolHandler.class.getName()).log(Level.SEVERE, null, ex); - } - } - if (requestMethod.equals(HttpMethod.POST)){ + JsonArrayBuilder jsonArrayBuilder = Json.createArrayBuilder(); + + JsonObject objApis = JsonUtil.getJsonObject(externalTool.getAllowedApiCalls()); + + JsonArray apis = objApis.getJsonArray("apis"); + apis.getValuesAs(JsonObject.class).forEach(((apiObj) -> { + String name = apiObj.getJsonString("name").getString(); + String httpmethod = apiObj.getJsonString("method").getString(); + int timeout = apiObj.getInt("timeOut"); + String urlTemplate = apiObj.getJsonString("urlTemplate").getString(); + logger.fine("URL Template: " + urlTemplate); + String apiPath = replaceTokensWithValues(urlTemplate); + logger.fine("URL WithTokens: " + apiPath); + String url = UrlSignerUtil.signUrl(apiPath, timeout, user, httpmethod, getApiToken().getTokenString()); + logger.fine("Signed URL: " + url); + jsonArrayBuilder.add(Json.createObjectBuilder().add("name", name).add("httpMethod", httpmethod) + .add("signedUrl", url).add("timeOut", timeout)); + })); + JsonArray allowedApiCallsArray = jsonArrayBuilder.build(); + allowedApis = JsonUtil.prettyPrint(allowedApiCallsArray); + logger.fine("Sending these signed URLS: " + allowedApis); + + //ToDo - if the allowedApiCalls() are defined, could/should we send them to tools using GET as well? + + if (requestMethod.equals(HttpMethod.POST)) { try { return postFormData(allowedApis); } catch (IOException | InterruptedException ex) { @@ -167,7 +149,7 @@ private String postFormData(String allowedApis ) throws IOException, Interrupted HttpClient client = HttpClient.newHttpClient(); HttpRequest request = HttpRequest.newBuilder().POST(HttpRequest.BodyPublishers.ofString(allowedApis)).uri(URI.create(externalTool.getToolUrl())) .header("Content-Type", "application/json") - .build(); + .build(); HttpResponse response = client.send(request, HttpResponse.BodyHandlers.ofString()); boolean redirect=false; int status = response.statusCode(); @@ -178,7 +160,7 @@ private String postFormData(String allowedApis ) throws IOException, Interrupted redirect = true; } } - if (redirect=true){ + if (redirect==true){ String newUrl = response.headers().firstValue("location").get(); toolContext = "http://" + response.uri().getAuthority(); diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java index ad7cee14e4a..432aa26714d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java @@ -6,6 +6,7 @@ import edu.harvard.iq.dataverse.externaltools.ExternalTool.Type; import edu.harvard.iq.dataverse.util.URLTokenUtil; import edu.harvard.iq.dataverse.util.URLTokenUtil.ReservedWord; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.externaltools.ExternalTool.Scope; import java.io.StringReader; @@ -151,8 +152,7 @@ public static ExternalTool parseAddExternalToolManifest(String manifest) { if (manifest == null || manifest.isEmpty()) { throw new IllegalArgumentException("External tool manifest was null or empty!"); } - JsonReader jsonReader = Json.createReader(new StringReader(manifest)); - JsonObject jsonObject = jsonReader.readObject(); + JsonObject jsonObject = JsonUtil.getJsonObject(manifest); //Note: ExternalToolServiceBeanTest tests are dependent on the order of these retrievals String displayName = getRequiredTopLevelField(jsonObject, DISPLAY_NAME); String toolName = getOptionalTopLevelField(jsonObject, TOOL_NAME); From 98f21eada0a8dbbffae6cf133dc038b0e55e1b74 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 17 May 2021 15:20:34 +0200 Subject: [PATCH 096/608] feat(metadata): add metadata block for CodeMeta #7844 --- scripts/api/data/metadatablocks/codemeta.tsv | 40 ++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 scripts/api/data/metadatablocks/codemeta.tsv diff --git a/scripts/api/data/metadatablocks/codemeta.tsv b/scripts/api/data/metadatablocks/codemeta.tsv new file mode 100644 index 00000000000..9f32cf20299 --- /dev/null +++ b/scripts/api/data/metadatablocks/codemeta.tsv @@ -0,0 +1,40 @@ +#metadataBlock name dataverseAlias displayName blockURI + codeMeta20 Software Metadata (v2.0) https://codemeta.github.io/terms/ +#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id termURI + softwareVersion Software version Version of the software instance. major.minor, e.g. 1.3 text 0 v#VALUE TRUE FALSE FALSE TRUE TRUE FALSE codeMeta20 https://schema.org/softwareVersion + developmentStatus Development Status Description of development status, e.g. work in progress (wip), active, inactive, suspended. See repostatus.org for more information. text 1 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE codeMeta20 + codeRepository Code Repository Link to the repository where the un-compiled, human readable code and related code is located (SVN, GitHub, CodePlex, institutional GitLab instance, etc.). https://... url 2 #VALUE TRUE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/codeRepository + programmingLanguage Programming Language The programming language(s) used to implement the software (e.g. Python, C++, Matlab, Fortran, Java, Julia,...) text 3 #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/programmingLanguage + operatingSystem Operating Systems Operating systems supported (e.g. Windows 10, OSX 11.3, Android 11). 4 FALSE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/operatingSystem + operatingSystemName Name The supported operating systems name Windows, Mac OS X, Linux, Android, ... text 5 #VALUE TRUE FALSE FALSE TRUE TRUE FALSE operatingSystem codeMeta20 + operatingSystemVersion Version The supported operating systems version text 6 #VALUE TRUE FALSE FALSE TRUE TRUE FALSE operatingSystem codeMeta20 + applicationCategory Application Category Type of software application, e.g. Simulation, Analysis, Visualisation. text #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/applicationCategory + applicationSubCategory Application Subcategory Subcategory of the application, e.g. Arcade Game. text #VALUE TRUE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/applicationSubCategory + softwareHelp Software Help/Documentation Link to help texts or documentation https://... url #VALUE FALSE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/softwareHelp + buildInstructions Build instructions Link to installation instructions/documentation https://... url #VALUE FALSE FALSE FALSE TRUE FALSE FALSE codeMeta20 + runtimePlatform Runtime Platform Runtime platform or script interpreter dependencies (Example - Java v1, Python2.3, .Net Framework 3.0). Supersedes runtime. text #VALUE TRUE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/runtimePlatform + targetProduct Target Product Target Operating System / Product to which the code applies. If applies to several versions, just the product name can be used. text #VALUE FALSE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/targetProduct + softwareRequirements Software Requirements Required software dependencies none FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/softwareRequirements + softwareRequirementsName Name Name or title of the required software/library text #VALUE FALSE FALSE FALSE FALSE FALSE softwareRequirements codeMeta20 + softwareRequirementsVersion Version Version of the required software/library text #VALUE FALSE FALSE FALSE FALSE FALSE softwareRequirements codeMeta20 + softwareRequirementsUrl URL Link to required software/library https://... url #VALUE FALSE FALSE FALSE FALSE FALSE softwareRequirements codeMeta20 + softwareSuggestions Software Suggestions Optional dependencies, e.g. for optional features, code development, etc. none FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 + softwareSuggestionsName Name Name or title of the optional software/library text #VALUE FALSE FALSE FALSE FALSE FALSE softwareSuggestions codeMeta20 + softwareSuggestionsVersion Version Version of the optional software/library text #VALUE FALSE FALSE FALSE FALSE FALSE softwareSuggestions codeMeta20 + softwareSuggestionsUrl URL Link to optional software/library https://... url #VALUE FALSE FALSE FALSE FALSE FALSE softwareSuggestions codeMeta20 + permissions Permissions Permission(s) required to run the code (for example, a mobile app may require full internet access or may run only on wifi). text #VALUE FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/permissions + memoryRequirements Memory Requirements Minimum memory requirements. text #VALUE FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/memoryRequirements + processorRequirements Processor Requirements Processor architecture required to run the application (e.g. IA64). text #VALUE FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/processorRequirements + storageRequirements Storage Requirements Storage requirements (e.g. free space required). text #VALUE FALSE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/storageRequirements + releaseNotes Release Notes Link to release notes https://... url #VALUE FALSE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/releaseNotes + contIntegration Continous integration Link to continuous integration service https://... url #VALUE FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 + issueTracker Issue Tracker Link to software bug reporting or issue tracking system https://... url #VALUE FALSE FALSE FALSE TRUE FALSE FALSE codeMeta20 +#controlledVocabulary DatasetField Value identifier displayOrder + developmentStatus Concept concept 0 + developmentStatus WIP wip 1 + developmentStatus Active active 2 + developmentStatus Inactive inactive 3 + developmentStatus Unsupported unsupported 4 + developmentStatus Moved moved 5 + developmentStatus Suspended suspended 6 + developmentStatus Abandoned abandoned 7 \ No newline at end of file From f9f9cbda095f0c72ce54ad020933d005c1d9d1ee Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 17 May 2021 15:34:46 +0200 Subject: [PATCH 097/608] docs(metadata): add CodeMeta reference to user guide --- doc/sphinx-guides/source/user/appendix.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/sphinx-guides/source/user/appendix.rst b/doc/sphinx-guides/source/user/appendix.rst index 003f02cdd61..e0fa83ad2ca 100755 --- a/doc/sphinx-guides/source/user/appendix.rst +++ b/doc/sphinx-guides/source/user/appendix.rst @@ -26,5 +26,6 @@ Detailed below are what metadata schemas we support for Citation and Domain Spec `Virtual Observatory (VO) Discovery and Provenance Metadata `__ (`see .tsv version `__). - `Life Sciences Metadata `__: based on `ISA-Tab Specification `__, along with controlled vocabulary from subsets of the `OBI Ontology `__ and the `NCBI Taxonomy for Organisms `__ (`see .tsv version `__). - `Journal Metadata `__: based on the `Journal Archiving and Interchange Tag Set, version 1.2 `__ (`see .tsv version `__). +- `CodeMeta Software Metadata `__: based on the `CodeMeta Software Metadata Schema, version 2.0 `__ (`see .tsv version `__) See also the `Dataverse Software 4.0 Metadata Crosswalk: DDI, DataCite, DC, DCTerms, VO, ISA-Tab `__ document and the :doc:`/admin/metadatacustomization` section of the Admin Guide. From ed485df14d3761fb3fca4e0ea0bfa2d20ed2f332 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 17 May 2021 15:35:25 +0200 Subject: [PATCH 098/608] feat(metadata): load CodeMeta by default in new installations. --- scripts/api/setup-datasetfields.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/api/setup-datasetfields.sh b/scripts/api/setup-datasetfields.sh index 0d2d60b9538..741a439e542 100755 --- a/scripts/api/setup-datasetfields.sh +++ b/scripts/api/setup-datasetfields.sh @@ -7,3 +7,4 @@ curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @da curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/astrophysics.tsv -H "Content-type: text/tab-separated-values" curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/biomedical.tsv -H "Content-type: text/tab-separated-values" curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/journals.tsv -H "Content-type: text/tab-separated-values" +curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/codemeta.tsv -H "Content-type: text/tab-separated-values" From 3c497a1475e864103c0583bea861e5e9376e23d1 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 18 May 2021 13:08:58 +0200 Subject: [PATCH 099/608] fix(metadata): fix wrong tab in CodeMeta and rephrase softwareVersion watermark helptext #7844 --- scripts/api/data/metadatablocks/codemeta.tsv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/api/data/metadatablocks/codemeta.tsv b/scripts/api/data/metadatablocks/codemeta.tsv index 9f32cf20299..c2711bca3ed 100644 --- a/scripts/api/data/metadatablocks/codemeta.tsv +++ b/scripts/api/data/metadatablocks/codemeta.tsv @@ -1,14 +1,14 @@ #metadataBlock name dataverseAlias displayName blockURI codeMeta20 Software Metadata (v2.0) https://codemeta.github.io/terms/ #datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id termURI - softwareVersion Software version Version of the software instance. major.minor, e.g. 1.3 text 0 v#VALUE TRUE FALSE FALSE TRUE TRUE FALSE codeMeta20 https://schema.org/softwareVersion + softwareVersion Software version Version of the software instance, usually following some convention like SemVer etc. e.g. 0.2.1 or 1.3 or 2021.1 etc text 0 v#VALUE TRUE FALSE FALSE TRUE TRUE FALSE codeMeta20 https://schema.org/softwareVersion developmentStatus Development Status Description of development status, e.g. work in progress (wip), active, inactive, suspended. See repostatus.org for more information. text 1 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE codeMeta20 codeRepository Code Repository Link to the repository where the un-compiled, human readable code and related code is located (SVN, GitHub, CodePlex, institutional GitLab instance, etc.). https://... url 2 #VALUE TRUE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/codeRepository programmingLanguage Programming Language The programming language(s) used to implement the software (e.g. Python, C++, Matlab, Fortran, Java, Julia,...) text 3 #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/programmingLanguage operatingSystem Operating Systems Operating systems supported (e.g. Windows 10, OSX 11.3, Android 11). 4 FALSE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/operatingSystem operatingSystemName Name The supported operating systems name Windows, Mac OS X, Linux, Android, ... text 5 #VALUE TRUE FALSE FALSE TRUE TRUE FALSE operatingSystem codeMeta20 operatingSystemVersion Version The supported operating systems version text 6 #VALUE TRUE FALSE FALSE TRUE TRUE FALSE operatingSystem codeMeta20 - applicationCategory Application Category Type of software application, e.g. Simulation, Analysis, Visualisation. text #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/applicationCategory + applicationCategory Application Category Type of software application, e.g. Simulation, Analysis, Visualisation. "" text #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/applicationCategory applicationSubCategory Application Subcategory Subcategory of the application, e.g. Arcade Game. text #VALUE TRUE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/applicationSubCategory softwareHelp Software Help/Documentation Link to help texts or documentation https://... url #VALUE FALSE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/softwareHelp buildInstructions Build instructions Link to installation instructions/documentation https://... url #VALUE FALSE FALSE FALSE TRUE FALSE FALSE codeMeta20 From 492491e89ef13f6254511b172641e1669b485e17 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 18 May 2021 13:11:52 +0200 Subject: [PATCH 100/608] fix(metadata): add standard name to Codemeta MDB displayName. #7844 --- scripts/api/data/metadatablocks/codemeta.tsv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/api/data/metadatablocks/codemeta.tsv b/scripts/api/data/metadatablocks/codemeta.tsv index c2711bca3ed..020cdcc11fc 100644 --- a/scripts/api/data/metadatablocks/codemeta.tsv +++ b/scripts/api/data/metadatablocks/codemeta.tsv @@ -1,5 +1,5 @@ #metadataBlock name dataverseAlias displayName blockURI - codeMeta20 Software Metadata (v2.0) https://codemeta.github.io/terms/ + codeMeta20 Software Metadata (CodeMeta v2.0) https://codemeta.github.io/terms/ #datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id termURI softwareVersion Software version Version of the software instance, usually following some convention like SemVer etc. e.g. 0.2.1 or 1.3 or 2021.1 etc text 0 v#VALUE TRUE FALSE FALSE TRUE TRUE FALSE codeMeta20 https://schema.org/softwareVersion developmentStatus Development Status Description of development status, e.g. work in progress (wip), active, inactive, suspended. See repostatus.org for more information. text 1 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE codeMeta20 From 382c1e4035752b3917cd9c967e55a8d41601a20d Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 21 Jul 2022 13:49:58 +0200 Subject: [PATCH 101/608] fix(metadata): make CodeMeta TSV usable #7844 - Add missing displayOrder values - Fix missing type for software requirements - Avoid splitting up compound fields too much, otherwise data is not exportable to schema.org or CodeMeta JSON-LD without special handling (#7856) - Tweak order - Tweak descriptions and examples - Fix whitespaces and line endings --- scripts/api/data/metadatablocks/codemeta.tsv | 77 ++++++++++---------- 1 file changed, 37 insertions(+), 40 deletions(-) diff --git a/scripts/api/data/metadatablocks/codemeta.tsv b/scripts/api/data/metadatablocks/codemeta.tsv index 020cdcc11fc..029ca2355ec 100644 --- a/scripts/api/data/metadatablocks/codemeta.tsv +++ b/scripts/api/data/metadatablocks/codemeta.tsv @@ -1,40 +1,37 @@ -#metadataBlock name dataverseAlias displayName blockURI - codeMeta20 Software Metadata (CodeMeta v2.0) https://codemeta.github.io/terms/ -#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id termURI - softwareVersion Software version Version of the software instance, usually following some convention like SemVer etc. e.g. 0.2.1 or 1.3 or 2021.1 etc text 0 v#VALUE TRUE FALSE FALSE TRUE TRUE FALSE codeMeta20 https://schema.org/softwareVersion - developmentStatus Development Status Description of development status, e.g. work in progress (wip), active, inactive, suspended. See repostatus.org for more information. text 1 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE codeMeta20 - codeRepository Code Repository Link to the repository where the un-compiled, human readable code and related code is located (SVN, GitHub, CodePlex, institutional GitLab instance, etc.). https://... url 2 #VALUE TRUE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/codeRepository - programmingLanguage Programming Language The programming language(s) used to implement the software (e.g. Python, C++, Matlab, Fortran, Java, Julia,...) text 3 #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/programmingLanguage - operatingSystem Operating Systems Operating systems supported (e.g. Windows 10, OSX 11.3, Android 11). 4 FALSE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/operatingSystem - operatingSystemName Name The supported operating systems name Windows, Mac OS X, Linux, Android, ... text 5 #VALUE TRUE FALSE FALSE TRUE TRUE FALSE operatingSystem codeMeta20 - operatingSystemVersion Version The supported operating systems version text 6 #VALUE TRUE FALSE FALSE TRUE TRUE FALSE operatingSystem codeMeta20 - applicationCategory Application Category Type of software application, e.g. Simulation, Analysis, Visualisation. "" text #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/applicationCategory - applicationSubCategory Application Subcategory Subcategory of the application, e.g. Arcade Game. text #VALUE TRUE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/applicationSubCategory - softwareHelp Software Help/Documentation Link to help texts or documentation https://... url #VALUE FALSE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/softwareHelp - buildInstructions Build instructions Link to installation instructions/documentation https://... url #VALUE FALSE FALSE FALSE TRUE FALSE FALSE codeMeta20 - runtimePlatform Runtime Platform Runtime platform or script interpreter dependencies (Example - Java v1, Python2.3, .Net Framework 3.0). Supersedes runtime. text #VALUE TRUE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/runtimePlatform - targetProduct Target Product Target Operating System / Product to which the code applies. If applies to several versions, just the product name can be used. text #VALUE FALSE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/targetProduct - softwareRequirements Software Requirements Required software dependencies none FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/softwareRequirements - softwareRequirementsName Name Name or title of the required software/library text #VALUE FALSE FALSE FALSE FALSE FALSE softwareRequirements codeMeta20 - softwareRequirementsVersion Version Version of the required software/library text #VALUE FALSE FALSE FALSE FALSE FALSE softwareRequirements codeMeta20 - softwareRequirementsUrl URL Link to required software/library https://... url #VALUE FALSE FALSE FALSE FALSE FALSE softwareRequirements codeMeta20 - softwareSuggestions Software Suggestions Optional dependencies, e.g. for optional features, code development, etc. none FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 - softwareSuggestionsName Name Name or title of the optional software/library text #VALUE FALSE FALSE FALSE FALSE FALSE softwareSuggestions codeMeta20 - softwareSuggestionsVersion Version Version of the optional software/library text #VALUE FALSE FALSE FALSE FALSE FALSE softwareSuggestions codeMeta20 - softwareSuggestionsUrl URL Link to optional software/library https://... url #VALUE FALSE FALSE FALSE FALSE FALSE softwareSuggestions codeMeta20 - permissions Permissions Permission(s) required to run the code (for example, a mobile app may require full internet access or may run only on wifi). text #VALUE FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/permissions - memoryRequirements Memory Requirements Minimum memory requirements. text #VALUE FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/memoryRequirements - processorRequirements Processor Requirements Processor architecture required to run the application (e.g. IA64). text #VALUE FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/processorRequirements - storageRequirements Storage Requirements Storage requirements (e.g. free space required). text #VALUE FALSE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/storageRequirements - releaseNotes Release Notes Link to release notes https://... url #VALUE FALSE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/releaseNotes - contIntegration Continous integration Link to continuous integration service https://... url #VALUE FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 - issueTracker Issue Tracker Link to software bug reporting or issue tracking system https://... url #VALUE FALSE FALSE FALSE TRUE FALSE FALSE codeMeta20 -#controlledVocabulary DatasetField Value identifier displayOrder - developmentStatus Concept concept 0 - developmentStatus WIP wip 1 - developmentStatus Active active 2 - developmentStatus Inactive inactive 3 - developmentStatus Unsupported unsupported 4 - developmentStatus Moved moved 5 - developmentStatus Suspended suspended 6 - developmentStatus Abandoned abandoned 7 \ No newline at end of file +#metadataBlock name dataverseAlias displayName blockURI + codeMeta20 Software Metadata (CodeMeta v2.0) https://codemeta.github.io/terms/ +#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id termURI + softwareVersion Software Version Version of the software instance, usually following some convention like SemVer etc. e.g. 0.2.1 or 1.3 or 2021.1 etc text 0 #VALUE TRUE FALSE FALSE TRUE TRUE FALSE codeMeta20 https://schema.org/softwareVersion + developmentStatus Development Status Description of development status, e.g. work in progress (wip), active, etc. See repostatus.org for more information. text 1 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE codeMeta20 https://www.repostatus.org + codeRepository Code Repository Link to the repository where the un-compiled, human readable code and related code is located (SVN, GitHub, CodePlex, institutional GitLab instance, Gitea, etc.). e.g. https://github.com/user/project url 2 #VALUE TRUE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/codeRepository + applicationCategory Application Category Type of software application, e.g. Simulation, Analysis, Visualisation. text 3 #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/applicationCategory + applicationSubCategory Application Subcategory Subcategory of the application, e.g. Arcade Game. text 4 #VALUE TRUE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/applicationSubCategory + programmingLanguage Programming Language The programming language(s) used to implement the software (e.g. Python, C++, Matlab, Fortran, Java, Julia,...) text 5 #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/programmingLanguage + runtimePlatform Runtime Platform Runtime platform or script interpreter dependencies (e.g. Java 11, Python 3.10 or .Net Framework 4.8). e.g. Python 3.10 text 6 #VALUE TRUE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/runtimePlatform + operatingSystem Operating Systems Operating systems supported (e.g. Windows 10, OSX 11.3, Android 11). text 7 #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/operatingSystem + targetProduct Target Product Target Operating System / Product to which the code applies. If applies to several versions, just the product name can be used. text 8 #VALUE TRUE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/targetProduct + buildInstructions Build Instructions Link to installation instructions/documentation e.g. https://github.com/user/project/blob/main/BUILD.md url 9 #VALUE FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://codemeta.github.io/terms/buildInstructions + softwareRequirementsItem Software Requirements Required software dependencies none 10 FALSE FALSE TRUE FALSE TRUE FALSE codeMeta20 + softwareRequirements Name & Version Name and version of the required software/library dependency e.g. Pandas 1.4.3 text 0 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE softwareRequirementsItem codeMeta20 https://schema.org/softwareRequirements + softwareRequirementsInfoUrl Info URL Link to required software/library homepage or documentation (ideally also versioned) e.g. https://pandas.pydata.org/pandas-docs/version/1.4.3 url 1 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE softwareRequirementsItem codeMeta20 https://dataverse.org/schema/codeMeta20/softwareRequirementsInfoUrl + softwareSuggestionsItem Software Suggestions Optional dependencies, e.g. for optional features, code development, etc. none 11 FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 + softwareSuggestions Name & Version Name and version of the optional software/library dependency e.g. Sphinx 5.0.2 text 0 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE softwareSuggestionsItem codeMeta20 https://codemeta.github.io/terms/softwareSuggestions + softwareSuggestionsInfoUrl Info URL Link to optional software/library homepage or documentation (ideally also versioned) e.g. https://www.sphinx-doc.org url 1 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE softwareSuggestionsItem codeMeta20 https://dataverse.org/schema/codeMeta20/softwareSuggestionsInfoUrl + memoryRequirements Memory Requirements Minimum memory requirements. text 12 #VALUE TRUE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/memoryRequirements + processorRequirements Processor Requirements Processor architecture required to run the application (e.g. IA64). text 13 #VALUE TRUE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/processorRequirements + storageRequirements Storage Requirements Storage requirements (e.g. free space required). text 14 #VALUE TRUE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/storageRequirements + permissions Permissions Permission(s) required to run the code (for example, a mobile app may require full internet access or may run only on wifi). text 15 #VALUE TRUE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/permissions + softwareHelp Software Help/Documentation Link to help texts or documentation e.g. https://user.github.io/project/docs url 16 #VALUE FALSE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/softwareHelp + readme Readme Link to the README of the project e.g. https://github.com/user/project/blob/main/README.md url 17 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://codemeta.github.io/terms/readme + releaseNotes Release Notes Link to release notes e.g. https://github.com/user/project/blob/main/docs/release-0.1.md url 18 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/releaseNotes + contIntegration Continuous Integration Link to continuous integration service e.g. https://github.com/user/project/actions url 19 #VALUE FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://codemeta.github.io/terms/contIntegration + issueTracker Issue Tracker Link to software bug reporting or issue tracking system e.g. https://github.com/user/project/issues url 20 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://codemeta.github.io/terms/issueTracker +#controlledVocabulary DatasetField Value identifier displayOrder + developmentStatus Concept concept 0 + developmentStatus WIP wip 1 + developmentStatus Active active 2 + developmentStatus Inactive inactive 3 + developmentStatus Unsupported unsupported 4 + developmentStatus Moved moved 5 + developmentStatus Suspended suspended 6 + developmentStatus Abandoned abandoned 7 From 1e8567d2ad343547d39c3df3d32e4a1d81229d6e Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 22 Jul 2022 10:01:45 +0200 Subject: [PATCH 102/608] feat(metadata): add i18n properties for CodeMeta #7844 --- .../java/propertyFiles/codeMeta20.properties | 85 +++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 src/main/java/propertyFiles/codeMeta20.properties diff --git a/src/main/java/propertyFiles/codeMeta20.properties b/src/main/java/propertyFiles/codeMeta20.properties new file mode 100644 index 00000000000..e203c1e46e9 --- /dev/null +++ b/src/main/java/propertyFiles/codeMeta20.properties @@ -0,0 +1,85 @@ +metadatablock.name=codeMeta20 +metadatablock.displayName=Software Metadata (CodeMeta 2.0) +datasetfieldtype.softwareVersion.title=Software Version +datasetfieldtype.softwareVersion.description=Version of the software instance, usually following some convention like SemVer etc. +datasetfieldtype.softwareVersion.watermark=e.g. 0.2.1 or 1.3 or 2021.1 etc +datasetfieldtype.developmentStatus.title=Development Status +datasetfieldtype.developmentStatus.description=Description of development status, e.g. work in progress (wip), active, etc. See repostatus.org for more information. +datasetfieldtype.developmentStatus.watermark= Development Status +datasetfieldtype.codeRepository.title=Code Repository +datasetfieldtype.codeRepository.description=Link to the repository where the un-compiled, human readable code and related code is located (SVN, GitHub, CodePlex, institutional GitLab instance, Gitea, etc.). +datasetfieldtype.codeRepository.watermark=e.g. https://github.com/user/project +datasetfieldtype.applicationCategory.title= Application Category +datasetfieldtype.applicationCategory.description=Type of software application, e.g. Simulation, Analysis, Visualisation. +datasetfieldtype.applicationCategory.watermark= +datasetfieldtype.applicationSubCategory.title=Application Subcategory +datasetfieldtype.applicationSubCategory.description=Subcategory of the application, e.g. Arcade Game. +datasetfieldtype.applicationSubCategory.watermark= +datasetfieldtype.programmingLanguage.title=Programming Language +datasetfieldtype.programmingLanguage.description=The programming language(s) used to implement the software (e.g. Python, C++, Matlab, Fortran, Java, Julia,...) +datasetfieldtype.programmingLanguage.watermark= +datasetfieldtype.runtimePlatform.title=Runtime Platform +datasetfieldtype.runtimePlatform.description=Runtime platform or script interpreter dependencies (e.g. Java 11, Python 3.10 or .Net Framework 4.8). +datasetfieldtype.runtimePlatform.watermark=e.g. Python 3.10 +datasetfieldtype.operatingSystem.title=Operating Systems +datasetfieldtype.operatingSystem.description=Operating systems supported (e.g. Windows 10, OSX 11.3, Android 11). +datasetfieldtype.operatingSystem.watermark= +datasetfieldtype.targetProduct.title=Target Product +datasetfieldtype.targetProduct.description=Target Operating System / Product to which the code applies. If applies to several versions, just the product name can be used. +datasetfieldtype.targetProduct.watermark= +datasetfieldtype.buildInstructions.title=Build Instructions +datasetfieldtype.buildInstructions.description=Link to installation instructions/documentation +datasetfieldtype.buildInstructions.watermark=e.g. https://github.com/user/project/blob/main/BUILD.md +datasetfieldtype.softwareRequirementsItem.title=Software Requirements +datasetfieldtype.softwareRequirementsItem.description=Required software dependencies +datasetfieldtype.softwareRequirementsItem.watermark= +datasetfieldtype.softwareRequirements.title=Name & Version +datasetfieldtype.softwareRequirements.description=Name and version of the required software/library dependency +datasetfieldtype.softwareRequirements.watermark=e.g. Pandas 1.4.3 +datasetfieldtype.softwareRequirementsInfoUrl.title=Info URL +datasetfieldtype.softwareRequirementsInfoUrl.description=Link to required software/library homepage or documentation (ideally also versioned) +datasetfieldtype.softwareRequirementsInfoUrl.watermark=e.g. https://pandas.pydata.org/pandas-docs/version/1.4.3 +datasetfieldtype.softwareSuggestionsItem.title=Software Suggestions +datasetfieldtype.softwareSuggestionsItem.description=Optional dependencies, e.g. for optional features, code development, etc. +datasetfieldtype.softwareSuggestionsItem.watermark= +datasetfieldtype.softwareSuggestions.title=Name & Version +datasetfieldtype.softwareSuggestions.description=Name and version of the optional software/library dependency +datasetfieldtype.softwareSuggestions.watermark=e.g. Sphinx 5.0.2 +datasetfieldtype.softwareSuggestionsInfoUrl.title=Info URL +datasetfieldtype.softwareSuggestionsInfoUrl.description=Link to optional software/library homepage or documentation (ideally also versioned) +datasetfieldtype.softwareSuggestionsInfoUrl.watermark=e.g. https://www.sphinx-doc.org +datasetfieldtype.memoryRequirements.title=Memory Requirements +datasetfieldtype.memoryRequirements.description=Minimum memory requirements. +datasetfieldtype.memoryRequirements.watermark= +datasetfieldtype.processorRequirements.title=Processor Requirements +datasetfieldtype.processorRequirements.description=Processor architecture required to run the application (e.g. IA64). +datasetfieldtype.processorRequirements.watermark= +datasetfieldtype.storageRequirements.title=Storage Requirements +datasetfieldtype.storageRequirements.description=Storage requirements (e.g. free space required). +datasetfieldtype.storageRequirements.watermark= +datasetfieldtype.permissions.title=Permissions +datasetfieldtype.permissions.description=Permission(s) required to run the code (for example, a mobile app may require full internet access or may run only on wifi). +datasetfieldtype.permissions.watermark= +datasetfieldtype.softwareHelp.title=Software Help/Documentation +datasetfieldtype.softwareHelp.description=Link to help texts or documentation +datasetfieldtype.softwareHelp.watermark=e.g. https://user.github.io/project/docs +datasetfieldtype.readme.title=Readme +datasetfieldtype.readme.description=Link to the README of the project +datasetfieldtype.readme.watermark=e.g. https://github.com/user/project/blob/main/README.md +datasetfieldtype.releaseNotes.title=Release Notes +datasetfieldtype.releaseNotes.description=Link to release notes +datasetfieldtype.releaseNotes.watermark=e.g. https://github.com/user/project/blob/main/docs/release-0.1.md +datasetfieldtype.contIntegration.title=Continuous Integration +datasetfieldtype.contIntegration.description=Link to continuous integration service +datasetfieldtype.contIntegration.watermark=e.g. https://github.com/user/project/actions +datasetfieldtype.issueTracker.title=Issue Tracker +datasetfieldtype.issueTracker.description=Link to software bug reporting or issue tracking system +datasetfieldtype.issueTracker.watermark=e.g. https://github.com/user/project/issues +controlledvocabulary.developmentStatus.concept=Concept +controlledvocabulary.developmentStatus.wip=WIP +controlledvocabulary.developmentStatus.active=Active +controlledvocabulary.developmentStatus.inactive=Inactive +controlledvocabulary.developmentStatus.unsupported=Unsupported +controlledvocabulary.developmentStatus.moved=Moved +controlledvocabulary.developmentStatus.suspended=Suspended +controlledvocabulary.developmentStatus.abandoned=Abandoned From c6c669c0ebbb8a3ef161ef48b39391a0ee7064a9 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 1 Aug 2022 12:38:14 +0200 Subject: [PATCH 103/608] refactor(metadata): move CodeMeta to experimental #7844 With the merge of computational workflow metadata considered experimental, move CodeMeta there, too. --- doc/sphinx-guides/source/user/appendix.rst | 2 +- scripts/api/setup-datasetfields.sh | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/user/appendix.rst b/doc/sphinx-guides/source/user/appendix.rst index fe8dc580f1e..d6009edc9c9 100755 --- a/doc/sphinx-guides/source/user/appendix.rst +++ b/doc/sphinx-guides/source/user/appendix.rst @@ -30,13 +30,13 @@ Detailed below are what metadata schemas we support for Citation and Domain Spec `Virtual Observatory (VO) Discovery and Provenance Metadata `__ (`see .tsv version `__). - `Life Sciences Metadata `__ (`see .tsv version `__): based on `ISA-Tab Specification `__, along with controlled vocabulary from subsets of the `OBI Ontology `__ and the `NCBI Taxonomy for Organisms `__. - `Journal Metadata `__ (`see .tsv version `__): based on the `Journal Archiving and Interchange Tag Set, version 1.2 `__. -- `CodeMeta Software Metadata `__: based on the `CodeMeta Software Metadata Schema, version 2.0 `__ (`see .tsv version `__) Experimental Metadata ~~~~~~~~~~~~~~~~~~~~~ Unlike supported metadata, experimental metadata is not enabled by default in a new Dataverse installation. Feedback via any `channel `_ is welcome! +- `CodeMeta Software Metadata `__: based on the `CodeMeta Software Metadata Schema, version 2.0 `__ (`see .tsv version `__) - `Computational Workflow Metadata `__ (`see .tsv version `__): adapted from `Bioschemas Computational Workflow Profile, version 1.0 `__ and `Codemeta `__. See Also diff --git a/scripts/api/setup-datasetfields.sh b/scripts/api/setup-datasetfields.sh index 741a439e542..0d2d60b9538 100755 --- a/scripts/api/setup-datasetfields.sh +++ b/scripts/api/setup-datasetfields.sh @@ -7,4 +7,3 @@ curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @da curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/astrophysics.tsv -H "Content-type: text/tab-separated-values" curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/biomedical.tsv -H "Content-type: text/tab-separated-values" curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/journals.tsv -H "Content-type: text/tab-separated-values" -curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/codemeta.tsv -H "Content-type: text/tab-separated-values" From d4189f37aad75f41f6a4ef3908aed761dec81061 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 4 Aug 2022 18:20:37 -0400 Subject: [PATCH 104/608] add signer tests, flip param order so sign/validate match, fix val bug --- .../iq/dataverse/api/AbstractApiBean.java | 2 +- .../iq/dataverse/util/UrlSignerUtil.java | 253 +++++++++--------- .../iq/dataverse/util/UrlSignerUtilTest.java | 50 ++++ 3 files changed, 178 insertions(+), 127 deletions(-) create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java index 4adac3feace..7ddde7064fc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java @@ -444,7 +444,7 @@ private AuthenticatedUser getAuthenticatedUserFromSignedUrl() { String signedUrl = httpRequest.getRequestURL().toString(); String method = httpRequest.getMethod(); - boolean validated = UrlSignerUtil.isValidUrl(signedUrl, method, user, key); + boolean validated = UrlSignerUtil.isValidUrl(signedUrl, user, method, key); if (validated){ authUser = targetUser; } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java index 1da1797a8ae..b11334520e6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java @@ -17,134 +17,135 @@ */ public class UrlSignerUtil { - private static final Logger logger = Logger.getLogger(UrlSignerUtil.class.getName()); + private static final Logger logger = Logger.getLogger(UrlSignerUtil.class.getName()); - /** - * - * @param baseUrl - the URL to sign - cannot contain query params - * "until","user", "method", or "token" - * @param timeout - how many minutes to make the URL valid for (note - time skew - * between the creator and receiver could affect the validation - * @param user - a string representing the user - should be understood by the - * creator/receiver - * @param method - one of the HTTP methods - * @param key - a secret key shared by the creator/receiver. In Dataverse - * this could be an APIKey (when sending URL to a tool that will - * use it to retrieve info from Dataverse) - * @return - the signed URL - */ - public static String signUrl(String baseUrl, Integer timeout, String user, String method, String key) { - StringBuilder signedUrl = new StringBuilder(baseUrl); + /** + * + * @param baseUrl - the URL to sign - cannot contain query params + * "until","user", "method", or "token" + * @param timeout - how many minutes to make the URL valid for (note - time skew + * between the creator and receiver could affect the validation + * @param user - a string representing the user - should be understood by the + * creator/receiver + * @param method - one of the HTTP methods + * @param key - a secret key shared by the creator/receiver. In Dataverse + * this could be an APIKey (when sending URL to a tool that will + * use it to retrieve info from Dataverse) + * @return - the signed URL + */ + public static String signUrl(String baseUrl, Integer timeout, String user, String method, String key) { + StringBuilder signedUrl = new StringBuilder(baseUrl); - boolean firstParam = true; - if (baseUrl.contains("?")) { - firstParam = false; - } - if (timeout != null) { - LocalDateTime validTime = LocalDateTime.now(); - validTime = validTime.plusMinutes(timeout); - validTime.toString(); - signedUrl.append(firstParam ? "?" : "&").append("until=").append(validTime); - firstParam=false; - } - if (user != null) { - signedUrl.append(firstParam ? "?" : "&").append("user=").append(user); - firstParam=false; - } - if (method != null) { - signedUrl.append(firstParam ? "?" : "&").append("method=").append(method); - } - signedUrl.append("&token="); - logger.fine("String to sign: " + signedUrl.toString() + ""); - signedUrl.append(DigestUtils.sha512Hex(signedUrl.toString() + key)); - logger.fine("Generated Signed URL: " + signedUrl.toString()); - if (logger.isLoggable(Level.FINE)) { - logger.fine( - "URL signature is " + (isValidUrl(signedUrl.toString(), method, user, key) ? "valid" : "invalid")); - } - return signedUrl.toString(); - } + boolean firstParam = true; + if (baseUrl.contains("?")) { + firstParam = false; + } + if (timeout != null) { + LocalDateTime validTime = LocalDateTime.now(); + validTime = validTime.plusMinutes(timeout); + validTime.toString(); + signedUrl.append(firstParam ? "?" : "&").append("until=").append(validTime); + firstParam = false; + } + if (user != null) { + signedUrl.append(firstParam ? "?" : "&").append("user=").append(user); + firstParam = false; + } + if (method != null) { + signedUrl.append(firstParam ? "?" : "&").append("method=").append(method); + firstParam=false; + } + signedUrl.append(firstParam ? "?" : "&").append("token="); + logger.fine("String to sign: " + signedUrl.toString() + ""); + signedUrl.append(DigestUtils.sha512Hex(signedUrl.toString() + key)); + logger.fine("Generated Signed URL: " + signedUrl.toString()); + if (logger.isLoggable(Level.FINE)) { + logger.fine( + "URL signature is " + (isValidUrl(signedUrl.toString(), user, method, key) ? "valid" : "invalid")); + } + return signedUrl.toString(); + } - /** - * This method will only return true if the URL and parameters except the - * "token" are unchanged from the original/match the values sent to this method, - * and the "token" parameter matches what this method recalculates using the - * shared key THe method also assures that the "until" timestamp is after the - * current time. - * - * @param signedUrl - the signed URL as received from Dataverse - * @param method - an HTTP method. If provided, the method in the URL must - * match - * @param user - a string representing the user, if provided the value must - * match the one in the url - * @param key - the shared secret key to be used in validation - * @return - true if valid, false if not: e.g. the key is not the same as the - * one used to generate the "token" any part of the URL preceding the - * "token" has been altered the method doesn't match (e.g. the server - * has received a POST request and the URL only allows GET) the user - * string doesn't match (e.g. the server knows user A is logged in, but - * the URL is only for user B) the url has expired (was used after the - * until timestamp) - */ - public static boolean isValidUrl(String signedUrl, String method, String user, String key) { - boolean valid = true; - try { - URL url = new URL(signedUrl); - List params = URLEncodedUtils.parse(url.getQuery(), Charset.forName("UTF-8")); - String hash = null; - String dateString = null; - String allowedMethod = null; - String allowedUser = null; - for (NameValuePair nvp : params) { - if (nvp.getName().equals("token")) { - hash = nvp.getValue(); - logger.fine("Hash: " + hash); - } - if (nvp.getName().equals("until")) { - dateString = nvp.getValue(); - logger.fine("Until: " + dateString); - } - if (nvp.getName().equals("method")) { - allowedMethod = nvp.getValue(); - logger.fine("Method: " + allowedMethod); - } - if (nvp.getName().equals("user")) { - allowedUser = nvp.getValue(); - logger.fine("User: " + allowedUser); - } - } + /** + * This method will only return true if the URL and parameters except the + * "token" are unchanged from the original/match the values sent to this method, + * and the "token" parameter matches what this method recalculates using the + * shared key THe method also assures that the "until" timestamp is after the + * current time. + * + * @param signedUrl - the signed URL as received from Dataverse + * @param method - an HTTP method. If provided, the method in the URL must + * match + * @param user - a string representing the user, if provided the value must + * match the one in the url + * @param key - the shared secret key to be used in validation + * @return - true if valid, false if not: e.g. the key is not the same as the + * one used to generate the "token" any part of the URL preceding the + * "token" has been altered the method doesn't match (e.g. the server + * has received a POST request and the URL only allows GET) the user + * string doesn't match (e.g. the server knows user A is logged in, but + * the URL is only for user B) the url has expired (was used after the + * until timestamp) + */ + public static boolean isValidUrl(String signedUrl, String user, String method, String key) { + boolean valid = true; + try { + URL url = new URL(signedUrl); + List params = URLEncodedUtils.parse(url.getQuery(), Charset.forName("UTF-8")); + String hash = null; + String dateString = null; + String allowedMethod = null; + String allowedUser = null; + for (NameValuePair nvp : params) { + if (nvp.getName().equals("token")) { + hash = nvp.getValue(); + logger.fine("Hash: " + hash); + } + if (nvp.getName().equals("until")) { + dateString = nvp.getValue(); + logger.fine("Until: " + dateString); + } + if (nvp.getName().equals("method")) { + allowedMethod = nvp.getValue(); + logger.fine("Method: " + allowedMethod); + } + if (nvp.getName().equals("user")) { + allowedUser = nvp.getValue(); + logger.fine("User: " + allowedUser); + } + } - int index = signedUrl.indexOf("&token="); - // Assuming the token is last - doesn't have to be, but no reason for the URL - // params to be rearranged either, and this should only cause false negatives if - // it does happen - String urlToHash = signedUrl.substring(0, index + 7); - logger.fine("String to hash: " + urlToHash + ""); - String newHash = DigestUtils.sha512Hex(urlToHash + key); - logger.fine("Calculated Hash: " + newHash); - if (!hash.equals(newHash)) { - logger.fine("Hash doesn't match"); - valid = false; - } - if (dateString != null && LocalDateTime.parse(dateString).isBefore(LocalDateTime.now())) { - logger.fine("Url is expired"); - valid = false; - } - if (method != null && !method.equals(allowedMethod)) { - logger.fine("Method doesn't match"); - valid = false; - } - if (user != null && !user.equals(allowedUser)) { - logger.fine("User doesn't match"); - valid = false; - } - } catch (Throwable t) { - // Want to catch anything like null pointers, etc. to force valid=false upon any - // error - logger.warning("Bad URL: " + signedUrl + " : " + t.getMessage()); - valid = false; - } - return valid; - } + int index = signedUrl.indexOf(((dateString==null && allowedMethod==null && allowedUser==null) ? "?":"&") + "token="); + // Assuming the token is last - doesn't have to be, but no reason for the URL + // params to be rearranged either, and this should only cause false negatives if + // it does happen + String urlToHash = signedUrl.substring(0, index + 7); + logger.fine("String to hash: " + urlToHash + ""); + String newHash = DigestUtils.sha512Hex(urlToHash + key); + logger.fine("Calculated Hash: " + newHash); + if (!hash.equals(newHash)) { + logger.fine("Hash doesn't match"); + valid = false; + } + if (dateString != null && LocalDateTime.parse(dateString).isBefore(LocalDateTime.now())) { + logger.fine("Url is expired"); + valid = false; + } + if (method != null && !method.equals(allowedMethod)) { + logger.fine("Method doesn't match"); + valid = false; + } + if (user != null && !user.equals(allowedUser)) { + logger.fine("User doesn't match"); + valid = false; + } + } catch (Throwable t) { + // Want to catch anything like null pointers, etc. to force valid=false upon any + // error + logger.warning("Bad URL: " + signedUrl + " : " + t.getMessage()); + valid = false; + } + return valid; + } -} \ No newline at end of file +} diff --git a/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java new file mode 100644 index 00000000000..2b9d507758f --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java @@ -0,0 +1,50 @@ +package edu.harvard.iq.dataverse.util; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.junit.Test; + +public class UrlSignerUtilTest { + + @Test + public void testSignAndValidate() { + + final String url = "http://localhost:8080/api/test1"; + final String get = "GET"; + final String post = "POST"; + + final String user1 = "Alice"; + final String user2 = "Bob"; + final int tooQuickTimeout = -1; + final int longTimeout = 1000; + final String key = "abracadabara open sesame"; + final String badkey = "abracadabara open says me"; + + Logger.getLogger(UrlSignerUtil.class.getName()).setLevel(Level.FINE); + + String signedUrl1 = UrlSignerUtil.signUrl(url, longTimeout, user1, get, key); + assertTrue(UrlSignerUtil.isValidUrl(signedUrl1, user1, get, key)); + assertTrue(UrlSignerUtil.isValidUrl(signedUrl1, user1, null, key)); + assertTrue(UrlSignerUtil.isValidUrl(signedUrl1, null, get, key)); + + assertFalse(UrlSignerUtil.isValidUrl(signedUrl1, null, get, badkey)); + assertFalse(UrlSignerUtil.isValidUrl(signedUrl1, user2, get, key)); + assertFalse(UrlSignerUtil.isValidUrl(signedUrl1, user1, post, key)); + assertFalse(UrlSignerUtil.isValidUrl(signedUrl1.replace(user1, user2), user1, get, key)); + assertFalse(UrlSignerUtil.isValidUrl(signedUrl1.replace(user1, user2), user2, get, key)); + assertFalse(UrlSignerUtil.isValidUrl(signedUrl1.replace(user1, user2), null, get, key)); + + String signedUrl2 = UrlSignerUtil.signUrl(url, null, null, null, key); + assertTrue(UrlSignerUtil.isValidUrl(signedUrl2, null, null, key)); + assertFalse(UrlSignerUtil.isValidUrl(signedUrl2, null, post, key)); + assertFalse(UrlSignerUtil.isValidUrl(signedUrl2, user1, null, key)); + + String signedUrl3 = UrlSignerUtil.signUrl(url, tooQuickTimeout, user1, get, key); + + assertFalse(UrlSignerUtil.isValidUrl(signedUrl3, user1, get, key)); + } +} From 0124da0e8263698be7e04ee96e4f5b61f93ea08e Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Mon, 8 Aug 2022 11:58:25 +0200 Subject: [PATCH 105/608] added sortOrder column in the license test file --- src/test/resources/json/license.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/test/resources/json/license.json b/src/test/resources/json/license.json index dd251322110..00502ded9a6 100644 --- a/src/test/resources/json/license.json +++ b/src/test/resources/json/license.json @@ -3,5 +3,6 @@ "uri": "http://dataverse..org/licenses/test/1.0", "iconUrl": "http://dataverse.org/licenses/test/1.0/icon.png", "shortDescription": "Dataverse Test License v1.0.", - "active": false + "active": false, + "sortOrder": 1 } From 5c7674c3c2f61de4e65b90b8d8e20db382127dc5 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Mon, 8 Aug 2022 12:10:32 +0200 Subject: [PATCH 106/608] added icompatibilities mention for the license sorting order field --- doc/release-notes/8671-sorting-licenses.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/release-notes/8671-sorting-licenses.md b/doc/release-notes/8671-sorting-licenses.md index 34ad697d5a7..4ceb9ec056f 100644 --- a/doc/release-notes/8671-sorting-licenses.md +++ b/doc/release-notes/8671-sorting-licenses.md @@ -1,3 +1,7 @@ ## License sorting -Licenses as shown in the dropdown in UI can be now sorted by the superusers. See [Configuring Licenses](https://guides.dataverse.org/en/5.10/installation/config.html#configuring-licenses) section of the Installation Guide for reference. \ No newline at end of file +Licenses as shown in the dropdown in UI can be now sorted by the superusers. See [Configuring Licenses](https://guides.dataverse.org/en/5.10/installation/config.html#configuring-licenses) section of the Installation Guide for reference. + +## Backward Incompatibilities + +License files are now required to contain the new "sortOrder" column. When attempting to create a new license without this field, an error would be returned. See [Configuring Licenses](https://guides.dataverse.org/en/5.10/installation/config.html#configuring-licenses) section of the Installation Guide for reference. \ No newline at end of file From 9ad8d6401eb0aa25fe4808e76322421376aa924b Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Mon, 8 Aug 2022 12:13:38 +0200 Subject: [PATCH 107/608] renamed: V5.11.0.1__8671-sorting_licenses.sql -> V5.11.1.2__8671-sorting_licenses.sql --- ...-sorting_licenses.sql => V5.11.1.2__8671-sorting_licenses.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/main/resources/db/migration/{V5.11.0.1__8671-sorting_licenses.sql => V5.11.1.2__8671-sorting_licenses.sql} (100%) diff --git a/src/main/resources/db/migration/V5.11.0.1__8671-sorting_licenses.sql b/src/main/resources/db/migration/V5.11.1.2__8671-sorting_licenses.sql similarity index 100% rename from src/main/resources/db/migration/V5.11.0.1__8671-sorting_licenses.sql rename to src/main/resources/db/migration/V5.11.1.2__8671-sorting_licenses.sql From ca0bac2a10828207c4ec9c00e0ebdfddd85339f2 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Mon, 8 Aug 2022 15:24:14 +0200 Subject: [PATCH 108/608] added sortOrder also in the error test license --- pom.xml | 2 +- src/test/resources/json/licenseError.json | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index eab64e522a5..19bdee46127 100644 --- a/pom.xml +++ b/pom.xml @@ -558,7 +558,7 @@ com.jayway.restassured rest-assured - 2.4.0 + 2.9.0 test diff --git a/src/test/resources/json/licenseError.json b/src/test/resources/json/licenseError.json index 552b6acadfb..533aa7ce7dc 100644 --- a/src/test/resources/json/licenseError.json +++ b/src/test/resources/json/licenseError.json @@ -4,5 +4,6 @@ "uri": "http://dataverse..org/licenses/test/ln6", "iconUrl": "http://dataverse.org/licenses/test/ln6/icon.png", "shortDescription": "A License that must have id 6.", - "active": true + "active": true, + "sortOrder": 1 } From 2af5cb31d1864588bfe7129c54bd61a7e653e55f Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Mon, 8 Aug 2022 15:26:00 +0200 Subject: [PATCH 109/608] revert by accident commited change in pom.xml --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 19bdee46127..eab64e522a5 100644 --- a/pom.xml +++ b/pom.xml @@ -558,7 +558,7 @@ com.jayway.restassured rest-assured - 2.9.0 + 2.4.0 test From 255e30865162b649283737d585f7441103919715 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Mon, 8 Aug 2022 17:18:11 +0200 Subject: [PATCH 110/608] test license created last should be at the end of the list now because of the increased sortOrder --- src/test/resources/json/license.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/resources/json/license.json b/src/test/resources/json/license.json index 00502ded9a6..d126b1d2280 100644 --- a/src/test/resources/json/license.json +++ b/src/test/resources/json/license.json @@ -4,5 +4,5 @@ "iconUrl": "http://dataverse.org/licenses/test/1.0/icon.png", "shortDescription": "Dataverse Test License v1.0.", "active": false, - "sortOrder": 1 + "sortOrder": 1000 } From 03c242bbc6d1213baa4d3ebb1225147999ff376f Mon Sep 17 00:00:00 2001 From: chenganj Date: Mon, 8 Aug 2022 14:28:23 -0400 Subject: [PATCH 111/608] license internationalization - first commit --- .../iq/dataverse/dataset/DatasetUtil.java | 31 +++++++++++++++---- .../java/propertyFiles/License.properties | 2 ++ .../migration/V4.13.0.1__3575-usernames.sql | 2 +- ...16.0.1__5303-addColumn-to-settingTable.sql | 6 +--- src/main/webapp/dataset-license-terms.xhtml | 4 +-- .../webapp/datasetLicenseInfoFragment.xhtml | 4 +-- 6 files changed, 33 insertions(+), 16 deletions(-) create mode 100644 src/main/java/propertyFiles/License.properties diff --git a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java index ccf947b8868..c6fc207163c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java @@ -24,12 +24,8 @@ import java.nio.channels.FileChannel; import java.nio.file.Files; import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.List; +import java.util.*; import java.util.logging.Logger; -import java.util.Base64; -import java.util.HashMap; -import java.util.Map; import javax.imageio.ImageIO; import org.apache.commons.io.IOUtils; import static edu.harvard.iq.dataverse.dataaccess.DataAccess.getStorageIO; @@ -566,7 +562,30 @@ public static String getLicenseIcon(DatasetVersion dsv) { public static String getLicenseDescription(DatasetVersion dsv) { License license = dsv.getTermsOfUseAndAccess().getLicense(); - return license != null ? license.getShortDescription() : BundleUtil.getStringFromBundle("license.custom.description"); + + if (license != null) { + return getLocalizedLicenseDescription(license.getName()) ; + } else { + return BundleUtil.getStringFromBundle("license.custom.description"); + } + } + + public static String getLocalizedLicenseDescription(String licenseName) { + String key = "license." + licenseName.toLowerCase().replace(" ","_") + ".description"; + if (key != null) { + try { + String _description = BundleUtil.getStringFromPropertyFile(key, "License"); + if (_description == null) { + return BundleUtil.getStringFromBundle("license.custom.description"); + } else { + return _description; + } + } catch (MissingResourceException mre) { + return BundleUtil.getStringFromBundle("license.custom.description"); + } + } else { + return BundleUtil.getStringFromBundle("license.custom.description"); + } } public static String getLocaleExternalStatus(String status) { diff --git a/src/main/java/propertyFiles/License.properties b/src/main/java/propertyFiles/License.properties new file mode 100644 index 00000000000..f6def616a04 --- /dev/null +++ b/src/main/java/propertyFiles/License.properties @@ -0,0 +1,2 @@ +license.cc0_1.0.description=Creative Commons CC0 1.0 Universal Public Domain Dedication. +license.cc_by_4.0.description=Creative Commons Attribution 4.0 International License. \ No newline at end of file diff --git a/src/main/resources/db/migration/V4.13.0.1__3575-usernames.sql b/src/main/resources/db/migration/V4.13.0.1__3575-usernames.sql index 0b1804bdfc4..9e35623c455 100644 --- a/src/main/resources/db/migration/V4.13.0.1__3575-usernames.sql +++ b/src/main/resources/db/migration/V4.13.0.1__3575-usernames.sql @@ -1 +1 @@ -CREATE UNIQUE INDEX index_authenticateduser_lower_useridentifier ON authenticateduser (lower(useridentifier)); +CREATE UNIQUE INDEX IF NOT EXISTS index_authenticateduser_lower_useridentifier ON authenticateduser (lower(useridentifier)); diff --git a/src/main/resources/db/migration/V4.16.0.1__5303-addColumn-to-settingTable.sql b/src/main/resources/db/migration/V4.16.0.1__5303-addColumn-to-settingTable.sql index 8309dacf486..db08efdab7e 100644 --- a/src/main/resources/db/migration/V4.16.0.1__5303-addColumn-to-settingTable.sql +++ b/src/main/resources/db/migration/V4.16.0.1__5303-addColumn-to-settingTable.sql @@ -4,10 +4,6 @@ ALTER TABLE setting ADD COLUMN IF NOT EXISTS ID SERIAL PRIMARY KEY; ALTER TABLE setting ADD COLUMN IF NOT EXISTS lang text; -ALTER TABLE setting - ADD CONSTRAINT non_empty_lang - CHECK (lang <> ''); - -CREATE UNIQUE INDEX unique_settings +CREATE UNIQUE INDEX IF NOT EXISTS unique_settings ON setting (name, coalesce(lang, '')); diff --git a/src/main/webapp/dataset-license-terms.xhtml b/src/main/webapp/dataset-license-terms.xhtml index 38f1f38e7d6..b81fed8a6d7 100644 --- a/src/main/webapp/dataset-license-terms.xhtml +++ b/src/main/webapp/dataset-license-terms.xhtml @@ -55,8 +55,8 @@

- - #{termsOfUseAndAccess.license.name} + + #{termsOfUseAndAccess.license.name}

diff --git a/src/main/webapp/datasetLicenseInfoFragment.xhtml b/src/main/webapp/datasetLicenseInfoFragment.xhtml index 554a3d95abf..e5d10c745dd 100644 --- a/src/main/webapp/datasetLicenseInfoFragment.xhtml +++ b/src/main/webapp/datasetLicenseInfoFragment.xhtml @@ -30,12 +30,12 @@ xmlns:jsf="http://xmlns.jcp.org/jsf">
+ jsf:rendered="#{!empty DatasetUtil:getLocalizedLicenseDescription(DatasetPage.workingVersion.termsOfUseAndAccess.license.name)} }">
- +
From 6f0c2600b6ea8a7ed7091f8a4e9698e1435b9391 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 9 Aug 2022 16:39:28 -0400 Subject: [PATCH 112/608] improve error handling --- src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index ef08444af69..a1d0ac86e8c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -1813,6 +1813,9 @@ public Response submitDatasetVersionToArchive(@PathParam("id") String dsid, Dataset ds = findDatasetOrDie(dsid); DatasetVersion dv = datasetversionService.findByFriendlyVersionNumber(ds.getId(), versionNumber); + if(dv==null) { + return error(Status.BAD_REQUEST, "Requested version not found."); + } if (dv.getArchivalCopyLocation() == null) { String className = settingsService.getValueForKey(SettingsServiceBean.Key.ArchiverClassName); // Note - the user is being sent via the createDataverseRequest(au) call to the @@ -1858,7 +1861,7 @@ public void run() { return error(Status.BAD_REQUEST, "Version was already submitted for archiving."); } } catch (WrappedResponse e1) { - return error(Status.UNAUTHORIZED, "api key required"); + return e1.getResponse(); } } @@ -1949,7 +1952,7 @@ public void run() { return error(Status.BAD_REQUEST, "No unarchived published dataset versions found"); } } catch (WrappedResponse e1) { - return error(Status.UNAUTHORIZED, "api key required"); + return e1.getResponse(); } } From 1aa61ea46ae35ca547cbd7fd801b991914967d52 Mon Sep 17 00:00:00 2001 From: xflv Date: Thu, 11 Aug 2022 11:23:02 +0800 Subject: [PATCH 113/608] In order to resolve #8838 in Add CSTR to Harvard Dataverse Related Publication ID Type list, we refer to the arXiv logo and Please review the code and make the changes. --- .../source/_static/api/ddi_dataset.xml | 12 +++++++++ ...dataset-create-new-all-default-fields.json | 26 +++++++++++++++++++ scripts/api/data/metadatablocks/citation.tsv | 3 ++- scripts/issues/2102/ready-state.sql | 2 ++ .../export/openaire/OpenAireExportUtil.java | 1 + .../java/propertyFiles/citation.properties | 1 + .../export/OpenAireExportUtilTest.java | 3 +++ .../export/SchemaDotOrgExporterTest.java | 3 ++- .../dataverse/export/dataset-all-defaults.txt | 26 +++++++++++++++++++ .../dataset-create-new-all-ddi-fields.json | 26 +++++++++++++++++++ .../iq/dataverse/export/ddi/exportfull.xml | 12 +++++++++ 11 files changed, 113 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/_static/api/ddi_dataset.xml b/doc/sphinx-guides/source/_static/api/ddi_dataset.xml index 79e0581131e..8b5dddacb64 100644 --- a/doc/sphinx-guides/source/_static/api/ddi_dataset.xml +++ b/doc/sphinx-guides/source/_static/api/ddi_dataset.xml @@ -163,8 +163,10 @@ RelatedMaterial1 RelatedMaterial2 + RelatedMaterial3 RelatedDatasets1 RelatedDatasets2 + RelatedDatasets3 @@ -183,8 +185,18 @@ + + + + RelatedPublicationIDNumber3 + + RelatedPublicationCitation3 + + + OtherReferences1 OtherReferences2 + OtherReferences3 StudyLevelErrorNotes diff --git a/scripts/api/data/dataset-create-new-all-default-fields.json b/scripts/api/data/dataset-create-new-all-default-fields.json index d7ae8cefbf7..e51f4d9e1b5 100644 --- a/scripts/api/data/dataset-create-new-all-default-fields.json +++ b/scripts/api/data/dataset-create-new-all-default-fields.json @@ -369,6 +369,32 @@ "typeClass": "primitive", "value": "http://RelatedPublicationURL2.org" } + }, + { + "publicationCitation": { + "typeName": "publicationCitation", + "multiple": false, + "typeClass": "primitive", + "value": "RelatedPublicationCitation3" + }, + "publicationIDType": { + "typeName": "publicationIDType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "CSTR" + }, + "publicationIDNumber": { + "typeName": "publicationIDNumber", + "multiple": false, + "typeClass": "primitive", + "value": "RelatedPublicationIDNumber3" + }, + "publicationURL": { + "typeName": "publicationURL", + "multiple": false, + "typeClass": "primitive", + "value": "http://RelatedPublicationURL3.org" + } } ] }, diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index 94aa509334f..e17db407ae7 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -110,7 +110,8 @@ publicationIDType purl 13 publicationIDType upc 14 publicationIDType url 15 - publicationIDType urn 16 + publicationIDType urn 16 + publicationIDType CSTR 17 contributorType Data Collector 0 contributorType Data Curator 1 contributorType Data Manager 2 diff --git a/scripts/issues/2102/ready-state.sql b/scripts/issues/2102/ready-state.sql index 96ccf58d865..03ab805531e 100644 --- a/scripts/issues/2102/ready-state.sql +++ b/scripts/issues/2102/ready-state.sql @@ -3284,6 +3284,7 @@ COPY controlledvocabalternate (id, strvalue, controlledvocabularyvalue_id, datas 22 United States of America 472 79 23 U.S.A. 472 79 24 YEMEN 483 79 +25 CSTR 825 30 \. @@ -4116,6 +4117,7 @@ COPY controlledvocabularyvalue (id, displayorder, identifier, strvalue, datasetf 822 29 review article 154 823 30 translation 154 824 31 other 154 +825 17 CSTR 30 \. diff --git a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java index 49fe203b96d..ffce432ce3b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java @@ -964,6 +964,7 @@ public static void writeRelatedIdentifierElement(XMLStreamWriter xmlw, DatasetVe relatedIdentifierTypeMap.put("URL".toLowerCase(), "URL"); relatedIdentifierTypeMap.put("URN".toLowerCase(), "URN"); relatedIdentifierTypeMap.put("WOS".toLowerCase(), "WOS"); + relatedIdentifierTypeMap.put("CSTR".toLowerCase(), "CSTR"); } for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { diff --git a/src/main/java/propertyFiles/citation.properties b/src/main/java/propertyFiles/citation.properties index bdcc48b5bf1..47356b404b8 100644 --- a/src/main/java/propertyFiles/citation.properties +++ b/src/main/java/propertyFiles/citation.properties @@ -250,6 +250,7 @@ controlledvocabulary.subject.social_sciences=Social Sciences controlledvocabulary.subject.other=Other controlledvocabulary.publicationIDType.ark=ark controlledvocabulary.publicationIDType.arxiv=arXiv +controlledvocabulary.publicationIDType.CSTR=CSTR controlledvocabulary.publicationIDType.bibcode=bibcode controlledvocabulary.publicationIDType.doi=doi controlledvocabulary.publicationIDType.ean13=ean13 diff --git a/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java index 7f7cc203506..8064b8e20f5 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java @@ -609,6 +609,9 @@ public void testWriteRelatedIdentifierElement() throws XMLStreamException, IOExc + "RelatedPublicationIDNumber1" + "" + "RelatedPublicationIDNumber2" + + "" + + "" + + "RelatedPublicationIDNumber3" + "", stringWriter.toString()); } diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index b5453e75fe5..5f0d4dfd106 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -408,7 +408,8 @@ private static void mockDatasetFieldSvc() { new ControlledVocabularyValue(3l, "bibcode", publicationIdTypes), new ControlledVocabularyValue(4l, "doi", publicationIdTypes), new ControlledVocabularyValue(5l, "ean13", publicationIdTypes), - new ControlledVocabularyValue(6l, "handle", publicationIdTypes) + new ControlledVocabularyValue(6l, "handle", publicationIdTypes), + new ControlledVocabularyValue(17l, "CSTR", publicationIdTypes) // Etc. There are more. )); publicationChildTypes.add(datasetFieldTypeSvc.add(publicationIdTypes)); diff --git a/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt b/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt index a3f0dffc767..a2a6d9c0778 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt +++ b/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt @@ -362,6 +362,32 @@ "typeClass": "primitive", "value": "http://RelatedPublicationURL2.org" } + }, + { + "publicationCitation": { + "typeName": "publicationCitation", + "multiple": false, + "typeClass": "primitive", + "value": "RelatedPublicationCitation3" + }, + "publicationIDType": { + "typeName": "publicationIDType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "CSTR" + }, + "publicationIDNumber": { + "typeName": "publicationIDNumber", + "multiple": false, + "typeClass": "primitive", + "value": "RelatedPublicationIDNumber3" + }, + "publicationURL": { + "typeName": "publicationURL", + "multiple": false, + "typeClass": "primitive", + "value": "http://RelatedPublicationURL3.org" + } } ] }, diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json index 1b327c15496..362a4ae4d90 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json +++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json @@ -321,6 +321,32 @@ "typeClass": "primitive", "value": "http://RelatedPublicationURL2.org" } + }, + { + "publicationCitation": { + "typeName": "publicationCitation", + "multiple": false, + "typeClass": "primitive", + "value": "RelatedPublicationCitation3" + }, + "publicationIDType": { + "typeName": "publicationIDType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "CSTR" + }, + "publicationIDNumber": { + "typeName": "publicationIDNumber", + "multiple": false, + "typeClass": "primitive", + "value": "RelatedPublicationIDNumber3" + }, + "publicationURL": { + "typeName": "publicationURL", + "multiple": false, + "typeClass": "primitive", + "value": "http://RelatedPublicationURL3.org" + } } ] }, diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml b/src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml index 0570c832e4f..4314775c7a2 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml +++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml @@ -166,8 +166,10 @@ RelatedMaterial1 RelatedMaterial2 + RelatedMaterial3 RelatedDatasets1 RelatedDatasets2 + RelatedDatasets3 @@ -185,9 +187,19 @@ RelatedPublicationCitation2 + + + + + RelatedPublicationIDNumber3 + + RelatedPublicationCitation3 + + OtherReferences1 OtherReferences2 + OtherReferences3 StudyLevelErrorNotes From 867c8b85778659dfd5f9435be7029e72256d2c95 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 30 Jun 2021 15:35:46 +0200 Subject: [PATCH 114/608] fix(jpa): introduce real defaults for DataSourceDefinition via MPCONFIG. #7980 Previously, with Dataverse software 5.3, the option to configure the database connection has been moved into the codebase. Admins can set details via MicroProfile Config. With updating to Payara 5.2021.4, we can provide default values for the connection details. Before, this had been tried with adding them to META-INF/microprofile-config.properties. However, this is not possible due to the timing of resource creation in the application server vs. reading the properties file. IQSS/dataverse#7980 --- .../edu/harvard/iq/dataverse/util/DataSourceProducer.java | 8 ++++++-- .../resources/META-INF/microprofile-config.properties | 6 +----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java index 630f192890b..322542c10d6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java @@ -16,9 +16,13 @@ // HINT: PGSimpleDataSource would work too, but as we use a connection pool, go with a javax.sql.ConnectionPoolDataSource // HINT: PGXADataSource is unnecessary (no distributed transactions used) and breaks ingest. className = "org.postgresql.ds.PGConnectionPoolDataSource", - user = "${MPCONFIG=dataverse.db.user}", + + // BEWARE: as this resource is created before defaults are read from META-INF/microprofile-config.properties, + // defaults must be provided in this Payara-proprietary manner. + user = "${MPCONFIG=dataverse.db.user:dataverse}", password = "${MPCONFIG=dataverse.db.password}", - url = "jdbc:postgresql://${MPCONFIG=dataverse.db.host}:${MPCONFIG=dataverse.db.port}/${MPCONFIG=dataverse.db.name}", + url = "jdbc:postgresql://${MPCONFIG=dataverse.db.host:localhost}:${MPCONFIG=dataverse.db.port:5432}/${MPCONFIG=dataverse.db.name:dataverse}", + // If we ever need to change these pool settings, we need to remove this class and create the resource // from web.xml. We can use MicroProfile Config in there for these values, impossible to do in the annotation. // diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties index 09d71dfbf3a..9e5d126d305 100644 --- a/src/main/resources/META-INF/microprofile-config.properties +++ b/src/main/resources/META-INF/microprofile-config.properties @@ -1,5 +1 @@ -# DATABASE -dataverse.db.host=localhost -dataverse.db.port=5432 -dataverse.db.user=dataverse -dataverse.db.name=dataverse +# Entries use key=value From 9e16306fb6731493b778abcbf933f1cd20bcb099 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 30 Jun 2021 16:49:19 +0200 Subject: [PATCH 115/608] feat(jpa): add advanced pool config options to DataSourceDefinition #7980 --- .../iq/dataverse/util/DataSourceProducer.java | 42 +++++++++++++------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java index 322542c10d6..52860971243 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java @@ -34,18 +34,36 @@ maxPoolSize = 100, // "The number of seconds that a physical connection should remain unused in the pool before the connection is closed for a connection pool. " // Payara DataSourceDefinitionDeployer default value = 300 (seconds) - maxIdleTime = 300) -// It's possible to add additional properties like this... -// -//properties = { -// "fish.payara.log-jdbc-calls=true" -//}) -// -// ... but at this time we don't think we need any. The full list -// of properties can be found at https://docs.payara.fish/community/docs/5.2021.6/documentation/payara-server/jdbc/advanced-connection-pool-properties.html#full-list-of-properties -// -// All these properties cannot be configured via MPCONFIG as Payara doesn't support this (yet). To be enhanced. -// See also https://github.com/payara/Payara/issues/5024 + maxIdleTime = 300, + + // Set more options via MPCONFIG, including defaults where applicable. + // TODO: Future versions of Payara might support setting integer properties like pool size, + // idle times, etc in a Payara-propietary way. See https://github.com/payara/Payara/pull/5272 + properties = { + // The following options are documented here: + // https://docs.payara.fish/community/docs/documentation/payara-server/jdbc/advanced-connection-pool-properties.html + "fish.payara.is-connection-validation-required=${MPCONFIG=dataverse.db.is-connection-validation-required:false}", + "fish.payara.connection-validation-method=${MPCONFIG=dataverse.db.connection-validation-method}", + "fish.payara.validation-table-name=${MPCONFIG=dataverse.db.validation-table-name}", + "fish.payara.validation-classname=${MPCONFIG=dataverse.db.validation-classname}", + "fish.payara.validate-atmost-once-period-in-seconds=${MPCONFIG=dataverse.db.validate-atmost-once-period-in-seconds:0}", + "fish.payara.connection-leak-timeout-in-seconds=${MPCONFIG=dataverse.db.connection-leak-timeout-in-seconds:0}", + "fish.payara.connection-leak-reclaim=${MPCONFIG=dataverse.db.connection-leak-reclaim:false}", + "fish.payara.connection-creation-retry-attempts=${MPCONFIG=dataverse.db.connection-creation-retry-attempts:0}", + "fish.payara.connection-creation-retry-interval-in-seconds=${MPCONFIG=dataverse.db.connection-creation-retry-interval-in-seconds:10}", + "fish.payara.statement-timeout-in-seconds=${MPCONFIG=dataverse.db.statement-timeout-in-seconds:-1}", + "fish.payara.lazy-connection-enlistment=${MPCONFIG=dataverse.db.lazy-connection-enlistment:false}", + "fish.payara.lazy-connection-association=${MPCONFIG=dataverse.db.lazy-connection-association:false}", + "fish.payara.pooling=${MPCONFIG=dataverse.db.pooling:true}", + "fish.payara.statement-cache-size=${MPCONFIG=dataverse.db.statement-cache-size:0}", + "fish.payara.match-connections=${MPCONFIG=dataverse.db.match-connections:true}", + "fish.payara.max-connection-usage-count=${MPCONFIG=dataverse.db.max-connection-usage-count:0}", + "fish.payara.statement-leak-timeout-in-seconds=${MPCONFIG=dataverse.db.statement-leak-timeout-in-seconds:0}", + "fish.payara.statement-leak-reclaim=${MPCONFIG=dataverse.db.statement-leak-reclaim:false}", + "fish.payara.statement-cache-type=${MPCONFIG=dataverse.db.statement-cache-type}", + "fish.payara.slow-query-threshold-in-seconds=${MPCONFIG=dataverse.db.slow-query-threshold-in-seconds:-1}", + "fish.payara.log-jdbc-calls=${MPCONFIG=dataverse.db.log-jdbc-calls:false}" + }) public class DataSourceProducer { @Resource(lookup = "java:app/jdbc/dataverse") From 11d6258a034f78a0505007cc24c34cad19d6e59e Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 30 Jun 2021 19:57:25 +0200 Subject: [PATCH 116/608] docs(jpa): add extensive docs about database connection configuration. #7980 --- .../source/installation/config.rst | 193 +++++++++++++----- 1 file changed, 142 insertions(+), 51 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index a70dcd4e8db..14c52ed0a35 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -238,6 +238,148 @@ As for the "Remote only" authentication mode, it means that: - ``:DefaultAuthProvider`` has been set to use the desired authentication provider - The "builtin" authentication provider has been disabled (:ref:`api-toggle-auth-provider`). Note that disabling the "builtin" authentication provider means that the API endpoint for converting an account from a remote auth provider will not work. Converting directly from one remote authentication provider to another (i.e. from GitHub to Google) is not supported. Conversion from remote is always to "builtin". Then the user initiates a conversion from "builtin" to remote. Note that longer term, the plan is to permit multiple login options to the same Dataverse installation account per https://github.com/IQSS/dataverse/issues/3487 (so all this talk of conversion will be moot) but for now users can only use a single login option, as explained in the :doc:`/user/account` section of the User Guide. In short, "remote only" might work for you if you only plan to use a single remote authentication provider such that no conversion between remote authentication providers will be necessary. + + +Database Persistence +-------------------- + +The Dataverse software uses a PostgreSQL server and a Solr Search Index to store objects users create. +You can configure basic and advanced settings of the PostgreSQL database connection with the help of +MicroProfile Config API. + +Basic Database Settings ++++++++++++++++++++++++ + +1. Any of these settings can be set via system properties (see :ref:`jvm-options`), environment variables or other + MicroProfile Config mechanisms supported by the appserver. + `See Payara docs for supported sources `_. +2. Remember to protect your secrets. For passwords, use an environment variable (bare minimum), a password alias named the same + as the key (OK) or use the "dir config source" of Payara (best). + + Alias creation example: + + .. code-block:: shell + + echo "AS_ADMIN_ALIASPASSWORD=changeme" > /tmp/p.txt + asadmin create-password-alias --passwordfile /tmp/p.txt dataverse.db.password + rm /tmp/p.txt + +3. Environment variables follow the key, replacing any dot, colon, dash, etc into an underscore "_" and all uppercase + letters. Example: ``dataverse.db.host`` -> ``DATAVERSE_DB_HOST`` + +.. list-table:: + :widths: 15 60 25 + :header-rows: 1 + :align: left + + * - MPCONFIG Key + - Description + - Default + * - dataverse.db.host + - The PostgreSQL server to connect to. + - ``localhost`` + * - dataverse.db.port + - The PostgreSQL server port to connect to. + - ``5432`` + * - dataverse.db.user + - The PostgreSQL user name to connect with. + - | ``dataverse`` + | (installer sets to ``dvnapp``) + * - dataverse.db.password + - The PostgreSQL users password to connect with. + + **Please note the safety advisory above.** + - *No default* + * - dataverse.db.name + - The PostgreSQL database name to use for the Dataverse installation. + - | ``dataverse`` + | (installer sets to ``dvndb``) + +Advanced Database Settings +++++++++++++++++++++++++++ + +The following options are useful in many scenarios. You might be interested in debug output during development or +monitoring performance in production. + +You can find more details within the +`Payara docs on Advanced Connection Pool Configuration `_. + +.. list-table:: + :widths: 15 60 25 + :header-rows: 1 + :align: left + + * - MPCONFIG Key + - Description + - Default + * - dataverse.db.is-connection-validation-required + - ``true``: Validate connections, allow server to reconnect in case of failure + - false + * - dataverse.db.connection-validation-method + - | The method of connection validation: + | ``table|autocommit|meta-data|custom-validation`` + - *No default* + * - dataverse.db.validation-table-name + - The name of the table used for validation if the validation method is set to ``table`` + - *No default* + * - dataverse.db.validation-classname + - The name of the custom class used for validation if the ``validation-method`` is set to ``custom-validation`` + - *No default* + * - dataverse.db.validate-atmost-once-period-in-seconds + - Specifies the time interval in seconds between successive requests to validate a connection at most once. + - ``0`` (disabled) + * - dataverse.db.connection-leak-timeout-in-seconds + - Specify timeout when connections count as "leaked". + - ``0`` (disabled) + * - dataverse.db.connection-leak-reclaim + - If enabled, leaked connection will be reclaimed by the pool after connection leak timeout occurs. + - ``false`` + * - dataverse.db.connection-creation-retry-attempts + - Number of attempts to create a new connection. + - ``0`` (no retries) + * - dataverse.db.connection-creation-retry-interval-in-seconds + - Time interval between retries while attempting to create a connection. Effective when "Creation Retry Attempts" is ``> 0``. + - ``10`` + * - dataverse.db.statement-timeout-in-seconds + - Timeout property of a connection to enable termination of abnormally long running queries. + - ``-1`` (disabled) + * - dataverse.db.lazy-connection-enlistment + - Enlist a resource to the transaction only when it is actually used in a method + - ``false`` + * - dataverse.db.lazy-connection-association + - Connections are lazily associated when an operation is performed on them + - ``false`` + * - dataverse.db.pooling + - When set to false, disables connection pooling for the pool + - ``true`` (enabled) + * - dataverse.db.statement-cache-size + - Caching is enabled when set to a positive non-zero value (for example, 10) + - ``0`` + * - dataverse.db.match-connections + - Turns connection matching for the pool on or off + - ``true`` + * - dataverse.db.max-connection-usage-count + - Connections will be reused by the pool for the specified number of times, after which they will be closed. + - ``0`` (disabled) + * - dataverse.db.statement-leak-timeout-in-seconds + - Specifiy timeout when statements should be considered to be "leaked" + - ``0`` (disabled) + * - dataverse.db.statement-leak-reclaim + - If enabled, leaked statement will be reclaimed by the pool after statement leak timeout occurs + - ``false`` + * - dataverse.db.statement-cache-type + - + - + * - dataverse.db.slow-query-threshold-in-seconds + - SQL queries that exceed this time in seconds will be logged. + - ``-1`` (disabled) + * - dataverse.db.log-jdbc-calls + - When set to true, all JDBC calls will be logged allowing tracing of all JDBC interactions including SQL + - ``false`` + + + + File Storage: Using a Local Filesystem and/or Swift and/or object stores ------------------------------------------------------------------------ @@ -1373,57 +1515,6 @@ dataverse.auth.password-reset-timeout-in-minutes Users have 60 minutes to change their passwords by default. You can adjust this value here. -dataverse.db.name -+++++++++++++++++ - -The PostgreSQL database name to use for the Dataverse installation. - -Defaults to ``dataverse`` (but the installer sets it to ``dvndb``). - -Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_DB_NAME``. - -dataverse.db.user -+++++++++++++++++ - -The PostgreSQL user name to connect with. - -Defaults to ``dataverse`` (but the installer sets it to ``dvnapp``). - -Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_DB_USER``. - -dataverse.db.password -+++++++++++++++++++++ - -The PostgreSQL users password to connect with. - -Preferrably use a JVM alias, as passwords in environment variables aren't safe. - -.. code-block:: shell - - echo "AS_ADMIN_ALIASPASSWORD=change-me-super-secret" > /tmp/password.txt - asadmin create-password-alias --passwordfile /tmp/password.txt dataverse.db.password - rm /tmp/password.txt - -Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_DB_PASSWORD``. - -dataverse.db.host -+++++++++++++++++ - -The PostgreSQL server to connect to. - -Defaults to ``localhost``. - -Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_DB_HOST``. - -dataverse.db.port -+++++++++++++++++ - -The PostgreSQL server port to connect to. - -Defaults to ``5432``, the default PostgreSQL port. - -Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_DB_PORT``. - dataverse.rserve.host +++++++++++++++++++++ From 8df07cf21c85827e64c9114dd553757299249cbf Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 30 Jun 2021 20:41:33 +0200 Subject: [PATCH 117/608] feat(jpa): remove exotic database connection options. #7980 As requested by @pdurbin, the long list was quite overwhelming. It's now damped down to 12 options in 3 subsubsections of the docs. --- .../source/installation/config.rst | 67 ++++++++++--------- .../iq/dataverse/util/DataSourceProducer.java | 14 ++-- 2 files changed, 38 insertions(+), 43 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 14c52ed0a35..69ec666ff16 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -301,8 +301,13 @@ Advanced Database Settings The following options are useful in many scenarios. You might be interested in debug output during development or monitoring performance in production. -You can find more details within the -`Payara docs on Advanced Connection Pool Configuration `_. +You can find more details within the Payara docs: + +- `User Guide: Connection Pool Configuration `_ +- `Tech Doc: Advanced Connection Pool Configuration `_. + +Connection Validation +^^^^^^^^^^^^^^^^^^^^^ .. list-table:: :widths: 15 60 25 @@ -328,48 +333,45 @@ You can find more details within the * - dataverse.db.validate-atmost-once-period-in-seconds - Specifies the time interval in seconds between successive requests to validate a connection at most once. - ``0`` (disabled) + +Connection & Statement Leaks +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 15 60 25 + :header-rows: 1 + :align: left + + * - MPCONFIG Key + - Description + - Default * - dataverse.db.connection-leak-timeout-in-seconds - Specify timeout when connections count as "leaked". - ``0`` (disabled) * - dataverse.db.connection-leak-reclaim - If enabled, leaked connection will be reclaimed by the pool after connection leak timeout occurs. - ``false`` - * - dataverse.db.connection-creation-retry-attempts - - Number of attempts to create a new connection. - - ``0`` (no retries) - * - dataverse.db.connection-creation-retry-interval-in-seconds - - Time interval between retries while attempting to create a connection. Effective when "Creation Retry Attempts" is ``> 0``. - - ``10`` - * - dataverse.db.statement-timeout-in-seconds - - Timeout property of a connection to enable termination of abnormally long running queries. - - ``-1`` (disabled) - * - dataverse.db.lazy-connection-enlistment - - Enlist a resource to the transaction only when it is actually used in a method - - ``false`` - * - dataverse.db.lazy-connection-association - - Connections are lazily associated when an operation is performed on them - - ``false`` - * - dataverse.db.pooling - - When set to false, disables connection pooling for the pool - - ``true`` (enabled) - * - dataverse.db.statement-cache-size - - Caching is enabled when set to a positive non-zero value (for example, 10) - - ``0`` - * - dataverse.db.match-connections - - Turns connection matching for the pool on or off - - ``true`` - * - dataverse.db.max-connection-usage-count - - Connections will be reused by the pool for the specified number of times, after which they will be closed. - - ``0`` (disabled) * - dataverse.db.statement-leak-timeout-in-seconds - Specifiy timeout when statements should be considered to be "leaked" - ``0`` (disabled) * - dataverse.db.statement-leak-reclaim - If enabled, leaked statement will be reclaimed by the pool after statement leak timeout occurs - ``false`` - * - dataverse.db.statement-cache-type - - - - + +Logging & Slow Performance +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 15 60 25 + :header-rows: 1 + :align: left + + * - MPCONFIG Key + - Description + - Default + * - dataverse.db.statement-timeout-in-seconds + - Timeout property of a connection to enable termination of abnormally long running queries. + - ``-1`` (disabled) * - dataverse.db.slow-query-threshold-in-seconds - SQL queries that exceed this time in seconds will be logged. - ``-1`` (disabled) @@ -379,7 +381,6 @@ You can find more details within the - File Storage: Using a Local Filesystem and/or Swift and/or object stores ------------------------------------------------------------------------ diff --git a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java index 52860971243..75b892064f5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java @@ -42,25 +42,19 @@ properties = { // The following options are documented here: // https://docs.payara.fish/community/docs/documentation/payara-server/jdbc/advanced-connection-pool-properties.html + // VALIDATION "fish.payara.is-connection-validation-required=${MPCONFIG=dataverse.db.is-connection-validation-required:false}", "fish.payara.connection-validation-method=${MPCONFIG=dataverse.db.connection-validation-method}", "fish.payara.validation-table-name=${MPCONFIG=dataverse.db.validation-table-name}", "fish.payara.validation-classname=${MPCONFIG=dataverse.db.validation-classname}", "fish.payara.validate-atmost-once-period-in-seconds=${MPCONFIG=dataverse.db.validate-atmost-once-period-in-seconds:0}", + // LEAK DETECTION "fish.payara.connection-leak-timeout-in-seconds=${MPCONFIG=dataverse.db.connection-leak-timeout-in-seconds:0}", "fish.payara.connection-leak-reclaim=${MPCONFIG=dataverse.db.connection-leak-reclaim:false}", - "fish.payara.connection-creation-retry-attempts=${MPCONFIG=dataverse.db.connection-creation-retry-attempts:0}", - "fish.payara.connection-creation-retry-interval-in-seconds=${MPCONFIG=dataverse.db.connection-creation-retry-interval-in-seconds:10}", - "fish.payara.statement-timeout-in-seconds=${MPCONFIG=dataverse.db.statement-timeout-in-seconds:-1}", - "fish.payara.lazy-connection-enlistment=${MPCONFIG=dataverse.db.lazy-connection-enlistment:false}", - "fish.payara.lazy-connection-association=${MPCONFIG=dataverse.db.lazy-connection-association:false}", - "fish.payara.pooling=${MPCONFIG=dataverse.db.pooling:true}", - "fish.payara.statement-cache-size=${MPCONFIG=dataverse.db.statement-cache-size:0}", - "fish.payara.match-connections=${MPCONFIG=dataverse.db.match-connections:true}", - "fish.payara.max-connection-usage-count=${MPCONFIG=dataverse.db.max-connection-usage-count:0}", "fish.payara.statement-leak-timeout-in-seconds=${MPCONFIG=dataverse.db.statement-leak-timeout-in-seconds:0}", "fish.payara.statement-leak-reclaim=${MPCONFIG=dataverse.db.statement-leak-reclaim:false}", - "fish.payara.statement-cache-type=${MPCONFIG=dataverse.db.statement-cache-type}", + // LOGGING, SLOWNESS, PERFORMANCE + "fish.payara.statement-timeout-in-seconds=${MPCONFIG=dataverse.db.statement-timeout-in-seconds:-1}", "fish.payara.slow-query-threshold-in-seconds=${MPCONFIG=dataverse.db.slow-query-threshold-in-seconds:-1}", "fish.payara.log-jdbc-calls=${MPCONFIG=dataverse.db.log-jdbc-calls:false}" }) From 59dbdd84b32691b4cc9c1de0c362794aef24c804 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 30 Jun 2021 20:54:10 +0200 Subject: [PATCH 118/608] docs(jpa): add release note for 5.6 about #7980 --- doc/release-notes/7980-enhanced-dsd.md | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 doc/release-notes/7980-enhanced-dsd.md diff --git a/doc/release-notes/7980-enhanced-dsd.md b/doc/release-notes/7980-enhanced-dsd.md new file mode 100644 index 00000000000..6a86a2c4b37 --- /dev/null +++ b/doc/release-notes/7980-enhanced-dsd.md @@ -0,0 +1,10 @@ +### Default Values for Database Connections fixed + +Introduced in Dataverse release 5.3 a regression might have hit you: +the announced default values for the database connection never actually worked. + +With the update to Payara 5.2022.3 it was possible to introduce working +defaults. The documentation has been changed accordingly. + +Together with this change, you can now enable advanced connection pool +configurations useful for debugging and monitoring. See the docs for details. \ No newline at end of file From 8a1ee7e3af9a8d7e470c75249d421a4e292d532a Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Aug 2022 23:42:54 +0200 Subject: [PATCH 119/608] feat(jpa): add JDBC connection parameter setting #7980 The avoid hacky parameter additions via the database name, this commit adds support for adding parameters to the JDBC URL. It defaults to empty (no parameters). --- doc/sphinx-guides/source/installation/config.rst | 4 ++++ .../edu/harvard/iq/dataverse/util/DataSourceProducer.java | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 69ec666ff16..269b9eeee55 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -294,6 +294,10 @@ Basic Database Settings - The PostgreSQL database name to use for the Dataverse installation. - | ``dataverse`` | (installer sets to ``dvndb``) + * - dataverse.db.parameters + - Connection parameters, see `Postgres JDBC docs `_ + Note: you don't need to provide the initial "?". + - *Empty string* Advanced Database Settings ++++++++++++++++++++++++++ diff --git a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java index 75b892064f5..4cb0f49190b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java @@ -21,7 +21,7 @@ // defaults must be provided in this Payara-proprietary manner. user = "${MPCONFIG=dataverse.db.user:dataverse}", password = "${MPCONFIG=dataverse.db.password}", - url = "jdbc:postgresql://${MPCONFIG=dataverse.db.host:localhost}:${MPCONFIG=dataverse.db.port:5432}/${MPCONFIG=dataverse.db.name:dataverse}", + url = "jdbc:postgresql://${MPCONFIG=dataverse.db.host:localhost}:${MPCONFIG=dataverse.db.port:5432}/${MPCONFIG=dataverse.db.name:dataverse}?${MPCONFIG=dataverse.db.parameters:}", // If we ever need to change these pool settings, we need to remove this class and create the resource // from web.xml. We can use MicroProfile Config in there for these values, impossible to do in the annotation. From e81b20c38c1f23bdd7c6cf2a347ddbf72d7db450 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Aug 2022 23:45:02 +0200 Subject: [PATCH 120/608] fix(jpa): make advanced JDBC options not log warnings #7980 With the addition of the advanced (but proprietary, Payara-only) settings for database connection monitoring, the non-present default for connection validation triggered unnecessary log clutter. Adding an empty default makes these go away and is inline with the default of Payara. --- doc/sphinx-guides/source/installation/config.rst | 6 +++--- .../edu/harvard/iq/dataverse/util/DataSourceProducer.java | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 269b9eeee55..04c17298a97 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -327,13 +327,13 @@ Connection Validation * - dataverse.db.connection-validation-method - | The method of connection validation: | ``table|autocommit|meta-data|custom-validation`` - - *No default* + - *Empty string* * - dataverse.db.validation-table-name - The name of the table used for validation if the validation method is set to ``table`` - - *No default* + - *Empty string* * - dataverse.db.validation-classname - The name of the custom class used for validation if the ``validation-method`` is set to ``custom-validation`` - - *No default* + - *Empty string* * - dataverse.db.validate-atmost-once-period-in-seconds - Specifies the time interval in seconds between successive requests to validate a connection at most once. - ``0`` (disabled) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java index 4cb0f49190b..800c05ae6dc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java @@ -44,9 +44,9 @@ // https://docs.payara.fish/community/docs/documentation/payara-server/jdbc/advanced-connection-pool-properties.html // VALIDATION "fish.payara.is-connection-validation-required=${MPCONFIG=dataverse.db.is-connection-validation-required:false}", - "fish.payara.connection-validation-method=${MPCONFIG=dataverse.db.connection-validation-method}", - "fish.payara.validation-table-name=${MPCONFIG=dataverse.db.validation-table-name}", - "fish.payara.validation-classname=${MPCONFIG=dataverse.db.validation-classname}", + "fish.payara.connection-validation-method=${MPCONFIG=dataverse.db.connection-validation-method:}", + "fish.payara.validation-table-name=${MPCONFIG=dataverse.db.validation-table-name:}", + "fish.payara.validation-classname=${MPCONFIG=dataverse.db.validation-classname:}", "fish.payara.validate-atmost-once-period-in-seconds=${MPCONFIG=dataverse.db.validate-atmost-once-period-in-seconds:0}", // LEAK DETECTION "fish.payara.connection-leak-timeout-in-seconds=${MPCONFIG=dataverse.db.connection-leak-timeout-in-seconds:0}", From 8948faa4c7215acfd5bf805c8f4b23477b3cf0d8 Mon Sep 17 00:00:00 2001 From: xflv Date: Mon, 22 Aug 2022 15:21:57 +0800 Subject: [PATCH 121/608] Hi reviewer, We have fixed the lowercase and sequential issues. Please review the code and make the changes. --- scripts/api/data/metadatablocks/citation.tsv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index e17db407ae7..7cc14043b2a 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -96,7 +96,8 @@ subject Other D12 13 publicationIDType ark 0 publicationIDType arXiv 1 - publicationIDType bibcode 2 + publicationIDType bibcode 2 + publicationIDType cstr 17 publicationIDType doi 3 publicationIDType ean13 4 publicationIDType eissn 5 @@ -110,8 +111,7 @@ publicationIDType purl 13 publicationIDType upc 14 publicationIDType url 15 - publicationIDType urn 16 - publicationIDType CSTR 17 + publicationIDType urn 16 contributorType Data Collector 0 contributorType Data Curator 1 contributorType Data Manager 2 From 2f664d0886031feee1ea006d0a77b269303ccc1c Mon Sep 17 00:00:00 2001 From: xflv Date: Mon, 29 Aug 2022 14:01:45 +0800 Subject: [PATCH 122/608] We have modified the AdminIT.java file as per example #8775 provided by qqmyers, please check if the conflict has been resolved. As we do not have access to the conflict details, if the modification does not resolve the conflict, please follow up with the file name and line number of the conflict and we will follow up with the modification. --- src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java index c34ee2dd4bf..df505224817 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java @@ -761,7 +761,7 @@ public void testLoadMetadataBlock_NoErrorPath() { Map>> data = JsonPath.from(body).getMap("data"); assertEquals(1, data.size()); List> addedElements = data.get("added"); - assertEquals(321, addedElements.size()); + assertEquals(322, addedElements.size()); Map statistics = new HashMap<>(); for (Map unit : addedElements) { @@ -777,7 +777,7 @@ public void testLoadMetadataBlock_NoErrorPath() { assertEquals(3, statistics.size()); assertEquals(1, (int) statistics.get("MetadataBlock")); assertEquals(78, (int) statistics.get("DatasetField")); - assertEquals(242, (int) statistics.get("Controlled Vocabulary")); + assertEquals(243, (int) statistics.get("Controlled Vocabulary")); } @Test From b6a526215d014de83d9399cfb345996847b97575 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 6 Sep 2022 17:00:27 -0400 Subject: [PATCH 123/608] cleanup differences with develop --- .../java/edu/harvard/iq/dataverse/api/Admin.java | 2 -- .../edu/harvard/iq/dataverse/api/Datasets.java | 3 --- .../V5.11.0.2__hdc-3b2-template-instructions.sql | 14 -------------- .../impl/DRSSubmitToArchiveCommandTest.java | 15 +++++---------- 4 files changed, 5 insertions(+), 29 deletions(-) delete mode 100644 src/main/resources/db/migration/V5.11.0.2__hdc-3b2-template-instructions.sql diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 4fc2d9d88e3..ef08444af69 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -102,8 +102,6 @@ import java.io.IOException; import java.io.OutputStream; -import edu.harvard.iq.dataverse.util.json.JsonUtil; - import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.rolesToJson; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.toJsonArray; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 492a12540f0..92cee9fffc9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -58,7 +58,6 @@ import edu.harvard.iq.dataverse.export.ExportService; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; -import edu.harvard.iq.dataverse.api.AbstractApiBean.WrappedResponse; import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO; import edu.harvard.iq.dataverse.batch.util.LoggingUtil; import edu.harvard.iq.dataverse.dataaccess.DataAccess; @@ -101,8 +100,6 @@ import java.io.IOException; import java.io.InputStream; import java.io.StringReader; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; import java.net.URI; import java.sql.Timestamp; import java.text.MessageFormat; diff --git a/src/main/resources/db/migration/V5.11.0.2__hdc-3b2-template-instructions.sql b/src/main/resources/db/migration/V5.11.0.2__hdc-3b2-template-instructions.sql deleted file mode 100644 index df1d3068159..00000000000 --- a/src/main/resources/db/migration/V5.11.0.2__hdc-3b2-template-instructions.sql +++ /dev/null @@ -1,14 +0,0 @@ -ALTER TABLE template ADD COLUMN IF NOT EXISTS instructions TEXT; - -ALTER TABLE dataset ADD COLUMN IF NOT EXISTS template_id BIGINT; - -DO $$ -BEGIN - - BEGIN - ALTER TABLE dataset ADD CONSTRAINT fx_dataset_template_id FOREIGN KEY (template_id) REFERENCES template(id); - EXCEPTION - WHEN duplicate_object THEN RAISE NOTICE 'Table constraint fk_dataset_template_id already exists'; - END; - -END $$; diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java index 62135287d18..ec5f4b8c69d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java @@ -1,14 +1,8 @@ package edu.harvard.iq.dataverse.engine.command.impl; -import edu.harvard.iq.dataverse.branding.BrandingUtil; -import edu.harvard.iq.dataverse.branding.BrandingUtilTest; - import org.erdtman.jcs.JsonCanonicalizer; import org.junit.Assert; import org.junit.Test; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; - import com.auth0.jwt.JWT; import com.auth0.jwt.algorithms.Algorithm; import com.auth0.jwt.interfaces.DecodedJWT; @@ -20,12 +14,12 @@ import java.security.spec.PKCS8EncodedKeySpec; import java.util.Base64; -/** - * - * @author michael - */ + public class DRSSubmitToArchiveCommandTest { + /* Simple test of JWT encode/decode functionality + * + */ @Test public void createJWT() throws CommandException { @@ -118,6 +112,7 @@ public void createJWT() throws CommandException { System.out.println(jwt.getPayload()); } catch (Exception e) { System.out.println(e.getLocalizedMessage()); + //Any exception is a failure, otherwise decoding worked. Assert.fail(e.getLocalizedMessage()); } From c4bc6c4879541f134f09b8af2478da531d285a3a Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 6 Sep 2022 17:09:45 -0400 Subject: [PATCH 124/608] more cleanup --- .../engine/command/impl/DRSSubmitToArchiveCommandTest.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java index ec5f4b8c69d..9cc9fae67ba 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java @@ -10,7 +10,7 @@ import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import java.security.KeyFactory; import java.security.interfaces.RSAPrivateKey; -import java.security.interfaces.RSAPublicKey; +//import java.security.interfaces.RSAPublicKey; import java.security.spec.PKCS8EncodedKeySpec; import java.util.Base64; @@ -50,7 +50,6 @@ public void createJWT() throws CommandException { + "J73YadnpU82C+7OnaTTCDVPfXYgPFLpE9xKFKkRFacgUbEnvZ2i0zSUquH0RAyaK" + "tJ0d/dnd5TQUccAZwT8Nrw0="; - //Todo - not in pkcs8 form String pubKeyString = "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAs0sI/nP7okc+QDog1rFe" + "acSa3c5Q0VdjyisERgs4b9TBD8EArxaesGUQ4AhOBH6VVLgcjSJ1dXjn6wY8CJca" + "fIb/UT2AgLDwhVeOlS3mbK/BTn76iOiLMGKgd6sHYuTVvgriUS4ExST/O1+RoKCL" @@ -90,7 +89,7 @@ public void createJWT() throws CommandException { KeyFactory keyFactory = KeyFactory.getInstance("RSA"); PKCS8EncodedKeySpec keySpec = new PKCS8EncodedKeySpec(encoded); RSAPrivateKey privKey = (RSAPrivateKey) keyFactory.generatePrivate(keySpec); - RSAPublicKey publicKey; + //RSAPublicKey publicKey; /* * If public key is needed: encoded = Base64.decodeBase64(publicKeyPEM); * From d74f9d780106d9f474236e942f28e9ca14bcd405 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 7 Sep 2022 10:29:01 -0400 Subject: [PATCH 125/608] Change setting name --- .../engine/command/impl/DRSSubmitToArchiveCommand.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index ecea6f1dcb2..89666f02db2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -61,7 +61,7 @@ public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implements Command { private static final Logger logger = Logger.getLogger(DRSSubmitToArchiveCommand.class.getName()); - private static final String DRS_CONFIG = ":DRSArchivalConfig"; + private static final String DRS_CONFIG = ":DRSArchiverConfig"; private static final String ADMIN_METADATA = "admin_metadata"; private static final String S3_BUCKET_NAME = "s3_bucket_name"; private static final String S3_PATH = "s3_path"; From 727ef9be9bd948f7c9b39855fa2648655439371b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 7 Sep 2022 17:36:04 -0400 Subject: [PATCH 126/608] fix progress during hash calc --- src/main/webapp/resources/js/fileupload.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/webapp/resources/js/fileupload.js b/src/main/webapp/resources/js/fileupload.js index 564239ee7ef..a478235c09f 100644 --- a/src/main/webapp/resources/js/fileupload.js +++ b/src/main/webapp/resources/js/fileupload.js @@ -144,6 +144,7 @@ var fileUpload = class fileUploadClass { async doUpload() { this.state = UploadState.UPLOADING; var thisFile = curFile-1; + this.id=thisFile; //This appears to be the earliest point when the file table has been populated, and, since we don't know how many table entries have had ids added already, we check var filerows = $('.ui-fileupload-files .ui-fileupload-row'); //Add an id attribute to each entry so we can later match progress and errors with the right entry @@ -318,7 +319,7 @@ var fileUpload = class fileUploadClass { if (directUploadReport) { getMD5(this.file, prog => { var current = 1 + prog; - $('progress').attr({ + $('[upid="' + this.id + '"] progress').attr({ value: current, max: 2 }); From aacff77fbcd210b6b212c34796d518d90f6342c5 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Fri, 9 Sep 2022 15:45:13 +0200 Subject: [PATCH 127/608] nullpointer fix when getting notification with newly created user --- .../iq/dataverse/authorization/AuthenticationServiceBean.java | 2 ++ .../iq/dataverse/authorization/users/AuthenticatedUser.java | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java index b242cd2936f..6c401223cd5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java @@ -647,6 +647,8 @@ public AuthenticatedUser createAuthenticatedUser(UserRecordIdentifier userRecord actionLogSvc.log( new ActionLogRecord(ActionLogRecord.ActionType.Auth, "createUser") .setInfo(authenticatedUser.getIdentifier())); + + authenticatedUser.postLoad(); return authenticatedUser; } diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java b/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java index b2b5fa92e76..84a13104d9d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java @@ -147,7 +147,7 @@ void prePersist() { } @PostLoad - void postLoad() { + public void postLoad() { mutedNotificationsSet = Type.tokenizeToSet(mutedNotifications); mutedEmailsSet = Type.tokenizeToSet(mutedEmails); } From 22cdaafd9df85267ef758460fd49b3a1d4cb8ef6 Mon Sep 17 00:00:00 2001 From: Bob Treacy Date: Sun, 11 Sep 2022 16:37:07 -0400 Subject: [PATCH 128/608] passes existing query params an signed urls in POST body as json --- .../iq/dataverse/api/AbstractApiBean.java | 4 +- .../edu/harvard/iq/dataverse/api/Users.java | 10 +- .../externaltools/ExternalToolHandler.java | 95 +++++++++++-------- .../iq/dataverse/util/URLTokenUtil.java | 17 ++++ .../iq/dataverse/util/UrlSignerUtil.java | 3 +- 5 files changed, 86 insertions(+), 43 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java index 7ddde7064fc..bedfac505db 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java @@ -441,9 +441,9 @@ private AuthenticatedUser getAuthenticatedUserFromSignedUrl() { String user = httpRequest.getParameter("user"); AuthenticatedUser targetUser = authSvc.getAuthenticatedUser(user); String key = System.getProperty(SystemConfig.API_SIGNING_SECRET,"") + authSvc.findApiTokenByUser(targetUser).getTokenString(); - String signedUrl = httpRequest.getRequestURL().toString(); + String signedUrl = httpRequest.getRequestURL().toString()+"?"+httpRequest.getQueryString(); String method = httpRequest.getMethod(); - + String queryString = httpRequest.getQueryString(); boolean validated = UrlSignerUtil.isValidUrl(signedUrl, user, method, key); if (validated){ authUser = targetUser; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Users.java b/src/main/java/edu/harvard/iq/dataverse/api/Users.java index b1177531874..82ab236b92d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Users.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Users.java @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.logging.Level; import java.util.logging.Logger; import javax.ejb.Stateless; import javax.json.JsonArray; @@ -201,7 +202,14 @@ public Response getAuthenticatedUserByToken() { AuthenticatedUser authenticatedUser = findUserByApiToken(tokenFromRequestAPI); if (authenticatedUser == null) { - return error(Response.Status.BAD_REQUEST, "User with token " + tokenFromRequestAPI + " not found."); + try { + authenticatedUser = findAuthenticatedUserOrDie(); + return ok(json(authenticatedUser)); + } catch (WrappedResponse ex) { + Logger.getLogger(Users.class.getName()).log(Level.SEVERE, null, ex); + return error(Response.Status.BAD_REQUEST, "User with token " + tokenFromRequestAPI + " not found."); + } + } else { return ok(json(authenticatedUser)); } diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java index 54489953606..59260b82e5e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java @@ -4,6 +4,7 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.URLTokenUtil; import edu.harvard.iq.dataverse.util.UrlSignerUtil; @@ -19,6 +20,7 @@ import java.net.http.HttpResponse; import java.util.ArrayList; import java.util.List; +import java.util.function.Consumer; import java.util.logging.Level; import java.util.logging.Logger; @@ -90,51 +92,66 @@ public String handleRequest(boolean preview) { JsonString method = obj.getJsonString("httpMethod"); requestMethod = method!=null?method.getString():HttpMethod.GET; JsonArray queryParams = obj.getJsonArray("queryParameters"); - if (queryParams == null || queryParams.isEmpty()) { - return ""; - } List params = new ArrayList<>(); - queryParams.getValuesAs(JsonObject.class).forEach((queryParam) -> { - queryParam.keySet().forEach((key) -> { - String value = queryParam.getString(key); - String param = getQueryParam(key, value); - if (param != null && !param.isEmpty()) { - params.add(param); - } + if (requestMethod.equals(HttpMethod.GET)) { + if (queryParams == null || queryParams.isEmpty()) { + return ""; + } + queryParams.getValuesAs(JsonObject.class).forEach((queryParam) -> { + queryParam.keySet().forEach((key) -> { + String value = queryParam.getString(key); + String param = getQueryParam(key, value); + if (param != null && !param.isEmpty()) { + params.add(param); + } + }); }); - }); - - String allowedApis; - JsonArrayBuilder jsonArrayBuilder = Json.createArrayBuilder(); - - JsonObject objApis = JsonUtil.getJsonObject(externalTool.getAllowedApiCalls()); - - JsonArray apis = objApis.getJsonArray("apis"); - apis.getValuesAs(JsonObject.class).forEach(((apiObj) -> { - String name = apiObj.getJsonString("name").getString(); - String httpmethod = apiObj.getJsonString("method").getString(); - int timeout = apiObj.getInt("timeOut"); - String urlTemplate = apiObj.getJsonString("urlTemplate").getString(); - logger.fine("URL Template: " + urlTemplate); - String apiPath = replaceTokensWithValues(urlTemplate); - logger.fine("URL WithTokens: " + apiPath); - String url = UrlSignerUtil.signUrl(apiPath, timeout, user, httpmethod, getApiToken().getTokenString()); - logger.fine("Signed URL: " + url); - jsonArrayBuilder.add(Json.createObjectBuilder().add("name", name).add("httpMethod", httpmethod) - .add("signedUrl", url).add("timeOut", timeout)); - })); - JsonArray allowedApiCallsArray = jsonArrayBuilder.build(); - allowedApis = JsonUtil.prettyPrint(allowedApiCallsArray); - logger.fine("Sending these signed URLS: " + allowedApis); - + } + //ToDo - if the allowedApiCalls() are defined, could/should we send them to tools using GET as well? if (requestMethod.equals(HttpMethod.POST)) { + JsonArrayBuilder jsonArrayBuilder = Json.createArrayBuilder(); try { - return postFormData(allowedApis); + queryParams.getValuesAs(JsonObject.class).forEach((queryParam) -> { + queryParam.keySet().forEach((key) -> { + String value = queryParam.getString(key); + String param = getPostBodyParam(key, value); + if (param != null && !param.isEmpty()) { + params.add(param); + } + }); + }); + String addVal = String.join(",", params); + String kvp = "{\"queryParameters\":{" + addVal; + + String allowedApis; + + JsonObject objApis = JsonUtil.getJsonObject(externalTool.getAllowedApiCalls()); + + JsonArray apis = objApis.getJsonArray("apis"); + apis.getValuesAs(JsonObject.class).forEach(((apiObj) -> { + String name = apiObj.getJsonString("name").getString(); + String httpmethod = apiObj.getJsonString("method").getString(); + int timeout = apiObj.getInt("timeOut"); + String urlTemplate = apiObj.getJsonString("urlTemplate").getString(); + logger.fine("URL Template: " + urlTemplate); + String apiPath = replaceTokensWithValues(urlTemplate); + logger.fine("URL WithTokens: " + apiPath); + String url = UrlSignerUtil.signUrl(apiPath, timeout, user, httpmethod, System.getProperty(SystemConfig.API_SIGNING_SECRET, "") + getApiToken().getTokenString()); + logger.fine("Signed URL: " + url); + jsonArrayBuilder.add(Json.createObjectBuilder().add("name", name).add("httpMethod", httpmethod) + .add("signedUrl", url).add("timeOut", timeout)); + })); + JsonArray allowedApiCallsArray = jsonArrayBuilder.build(); + allowedApis = "\"signedUrls\":" + JsonUtil.prettyPrint(allowedApiCallsArray) + "}"; + logger.fine("Sending these signed URLS: " + allowedApis); + String body = kvp + "}," + allowedApis; + logger.info(body); + return postFormData(body); } catch (IOException | InterruptedException ex) { - Logger.getLogger(ExternalToolHandler.class.getName()).log(Level.SEVERE, null, ex); - } + Logger.getLogger(ExternalToolHandler.class.getName()).log(Level.SEVERE, null, ex); + } } if (!preview) { return "?" + String.join("&", params); @@ -162,7 +179,7 @@ private String postFormData(String allowedApis ) throws IOException, Interrupted } if (redirect==true){ String newUrl = response.headers().firstValue("location").get(); - toolContext = "http://" + response.uri().getAuthority(); +// toolContext = "http://" + response.uri().getAuthority(); url = newUrl; } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java index 1a1e92a2802..97dcb50dfea 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java @@ -93,6 +93,23 @@ public String getQueryParam(String key, String value) { } } + + public String getPostBodyParam(String key, String value) { + String tokenValue = null; + tokenValue = getTokenValue(value); + if (tokenValue != null) { + try{ + int x =Integer.parseInt(tokenValue); + return "\""+ key + "\"" + ':' + tokenValue; + } catch (NumberFormatException nfe){ + return "\""+ key + "\"" + ':' + "\"" + tokenValue + "\""; + } + + } else { + return null; + } + } + /** * Tries to replace all occurrences of {} with the value for the * corresponding ReservedWord diff --git a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java index b11334520e6..85ae4c79190 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java @@ -57,6 +57,7 @@ public static String signUrl(String baseUrl, Integer timeout, String user, Strin } signedUrl.append(firstParam ? "?" : "&").append("token="); logger.fine("String to sign: " + signedUrl.toString() + ""); + signedUrl.append(DigestUtils.sha512Hex(signedUrl.toString() + key)); logger.fine("Generated Signed URL: " + signedUrl.toString()); if (logger.isLoggable(Level.FINE)) { @@ -119,7 +120,7 @@ public static boolean isValidUrl(String signedUrl, String user, String method, S // Assuming the token is last - doesn't have to be, but no reason for the URL // params to be rearranged either, and this should only cause false negatives if // it does happen - String urlToHash = signedUrl.substring(0, index + 7); + String urlToHash = signedUrl.substring(0, index + 7).toString(); logger.fine("String to hash: " + urlToHash + ""); String newHash = DigestUtils.sha512Hex(urlToHash + key); logger.fine("Calculated Hash: " + newHash); From d35e1c3ea8e149854598053f9dd0dd35bc9b132b Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 12 Sep 2022 09:04:11 -0400 Subject: [PATCH 129/608] Trigger auto-analyze more frequently for guestbook estimates --- .../db/migration/V5.12.0.1__8840-improve-guestbook-estimates.sql | 1 + 1 file changed, 1 insertion(+) create mode 100644 src/main/resources/db/migration/V5.12.0.1__8840-improve-guestbook-estimates.sql diff --git a/src/main/resources/db/migration/V5.12.0.1__8840-improve-guestbook-estimates.sql b/src/main/resources/db/migration/V5.12.0.1__8840-improve-guestbook-estimates.sql new file mode 100644 index 00000000000..91ab5253f9c --- /dev/null +++ b/src/main/resources/db/migration/V5.12.0.1__8840-improve-guestbook-estimates.sql @@ -0,0 +1 @@ +ALTER TABLE guestbookresponse SET (autovacuum_analyze_scale_factor = 0.01); \ No newline at end of file From 11ff8d7a7a75c9566935e6b2581bb88cdad0529e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 12 Sep 2022 09:29:19 -0400 Subject: [PATCH 130/608] rel note --- doc/release-notes/8840-improved-download-estimate.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/release-notes/8840-improved-download-estimate.md diff --git a/doc/release-notes/8840-improved-download-estimate.md b/doc/release-notes/8840-improved-download-estimate.md new file mode 100644 index 00000000000..cb264b7e683 --- /dev/null +++ b/doc/release-notes/8840-improved-download-estimate.md @@ -0,0 +1 @@ +To improve performance, Dataverse estimates download counts. This release includes an update that makes the estimate more accurate. \ No newline at end of file From 0bd68429d35e75f2c34e8090fa42dbda082423e4 Mon Sep 17 00:00:00 2001 From: cstr Date: Wed, 14 Sep 2022 08:46:04 +0800 Subject: [PATCH 131/608] Update OpenAireExportUtilTest.java --- .../edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java index 8064b8e20f5..40664527cfc 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java @@ -609,7 +609,6 @@ public void testWriteRelatedIdentifierElement() throws XMLStreamException, IOExc + "RelatedPublicationIDNumber1" + "" + "RelatedPublicationIDNumber2" - + "" + "" + "RelatedPublicationIDNumber3" + "", From 561d8b7fecf9a1c64266bf17e6b1f0923f783e11 Mon Sep 17 00:00:00 2001 From: cstr Date: Wed, 14 Sep 2022 09:02:17 +0800 Subject: [PATCH 132/608] Update SchemaDotOrgExporterTest.java --- .../harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index 5f0d4dfd106..d21d24a5432 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -409,7 +409,7 @@ private static void mockDatasetFieldSvc() { new ControlledVocabularyValue(4l, "doi", publicationIdTypes), new ControlledVocabularyValue(5l, "ean13", publicationIdTypes), new ControlledVocabularyValue(6l, "handle", publicationIdTypes), - new ControlledVocabularyValue(17l, "CSTR", publicationIdTypes) + new ControlledVocabularyValue(17l, "cstr", publicationIdTypes) // Etc. There are more. )); publicationChildTypes.add(datasetFieldTypeSvc.add(publicationIdTypes)); From 39fdcaba2db17e393d388ba97c83682d38ef0f96 Mon Sep 17 00:00:00 2001 From: cstr Date: Wed, 14 Sep 2022 09:12:34 +0800 Subject: [PATCH 133/608] Update ready-state.sql Override --- scripts/issues/2102/ready-state.sql | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/issues/2102/ready-state.sql b/scripts/issues/2102/ready-state.sql index 03ab805531e..96ccf58d865 100644 --- a/scripts/issues/2102/ready-state.sql +++ b/scripts/issues/2102/ready-state.sql @@ -3284,7 +3284,6 @@ COPY controlledvocabalternate (id, strvalue, controlledvocabularyvalue_id, datas 22 United States of America 472 79 23 U.S.A. 472 79 24 YEMEN 483 79 -25 CSTR 825 30 \. @@ -4117,7 +4116,6 @@ COPY controlledvocabularyvalue (id, displayorder, identifier, strvalue, datasetf 822 29 review article 154 823 30 translation 154 824 31 other 154 -825 17 CSTR 30 \. From ffde80e9b09dec1bd0bfcf793a0ed53320edc332 Mon Sep 17 00:00:00 2001 From: cstr Date: Wed, 14 Sep 2022 09:22:23 +0800 Subject: [PATCH 134/608] Update citation.tsv Adjusted sequence number --- scripts/api/data/metadatablocks/citation.tsv | 32 ++++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index a6d54a9a1ad..1b1ff0ae819 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -97,22 +97,22 @@ publicationIDType ark 0 publicationIDType arXiv 1 publicationIDType bibcode 2 - publicationIDType cstr 17 - publicationIDType doi 3 - publicationIDType ean13 4 - publicationIDType eissn 5 - publicationIDType handle 6 - publicationIDType isbn 7 - publicationIDType issn 8 - publicationIDType istc 9 - publicationIDType lissn 10 - publicationIDType lsid 11 - publicationIDType pmid 12 - publicationIDType purl 13 - publicationIDType upc 14 - publicationIDType url 15 - publicationIDType urn 16 - publicationIDType DASH-NRS 17 + publicationIDType cstr 3 + publicationIDType doi 4 + publicationIDType ean13 5 + publicationIDType eissn 6 + publicationIDType handle 7 + publicationIDType isbn 8 + publicationIDType issn 9 + publicationIDType istc 10 + publicationIDType lissn 11 + publicationIDType lsid 12 + publicationIDType pmid 13 + publicationIDType purl 14 + publicationIDType upc 15 + publicationIDType url 16 + publicationIDType urn 17 + publicationIDType DASH-NRS 18 contributorType Data Collector 0 contributorType Data Curator 1 contributorType Data Manager 2 From 1007a6b1f1af2eb59f56e2a2d595139c064f289f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 14 Sep 2022 13:47:52 -0400 Subject: [PATCH 135/608] Update conf/solr/8.11.1/schema.xml Co-authored-by: Philip Durbin --- conf/solr/8.11.1/schema.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/solr/8.11.1/schema.xml b/conf/solr/8.11.1/schema.xml index 78cb0270532..381d72d2756 100644 --- a/conf/solr/8.11.1/schema.xml +++ b/conf/solr/8.11.1/schema.xml @@ -1110,7 +1110,7 @@ --> - + From 7afa2935d72410df2e7f3ad847ceea4037d9244f Mon Sep 17 00:00:00 2001 From: cstr Date: Thu, 15 Sep 2022 17:40:25 +0800 Subject: [PATCH 136/608] Update dataset-create-new-all-ddi-fields.json override --- .../dataset-create-new-all-ddi-fields.json | 26 ------------------- 1 file changed, 26 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json index 362a4ae4d90..1b327c15496 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json +++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json @@ -321,32 +321,6 @@ "typeClass": "primitive", "value": "http://RelatedPublicationURL2.org" } - }, - { - "publicationCitation": { - "typeName": "publicationCitation", - "multiple": false, - "typeClass": "primitive", - "value": "RelatedPublicationCitation3" - }, - "publicationIDType": { - "typeName": "publicationIDType", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "CSTR" - }, - "publicationIDNumber": { - "typeName": "publicationIDNumber", - "multiple": false, - "typeClass": "primitive", - "value": "RelatedPublicationIDNumber3" - }, - "publicationURL": { - "typeName": "publicationURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://RelatedPublicationURL3.org" - } } ] }, From 627adf7ad565928ef812ef425f32db0eaa07bb34 Mon Sep 17 00:00:00 2001 From: cstr Date: Thu, 15 Sep 2022 17:41:01 +0800 Subject: [PATCH 137/608] Update exportfull.xml override --- .../harvard/iq/dataverse/export/ddi/exportfull.xml | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml b/src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml index 4314775c7a2..0570c832e4f 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml +++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml @@ -166,10 +166,8 @@ RelatedMaterial1 RelatedMaterial2 - RelatedMaterial3 RelatedDatasets1 RelatedDatasets2 - RelatedDatasets3 @@ -187,19 +185,9 @@ RelatedPublicationCitation2 - - - - - RelatedPublicationIDNumber3 - - RelatedPublicationCitation3 - - OtherReferences1 OtherReferences2 - OtherReferences3 StudyLevelErrorNotes From 09c30a902ff776e721959a72d4d253e597b0730f Mon Sep 17 00:00:00 2001 From: cstr Date: Thu, 15 Sep 2022 17:46:15 +0800 Subject: [PATCH 138/608] Update dataset-all-defaults.txt override --- .../dataverse/export/dataset-all-defaults.txt | 28 +------------------ 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt b/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt index a2a6d9c0778..7348d54b7dd 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt +++ b/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt @@ -16,7 +16,7 @@ "createTime": "2015-09-24T16:47:50Z", "license": { "name": "CC0 1.0", - "uri": "http://creativecommons.org/publicdomain/zero/1.0/" + "uri": "https://creativecommons.org/publicdomain/zero/1.0/" }, "metadataBlocks": { "citation": { @@ -362,32 +362,6 @@ "typeClass": "primitive", "value": "http://RelatedPublicationURL2.org" } - }, - { - "publicationCitation": { - "typeName": "publicationCitation", - "multiple": false, - "typeClass": "primitive", - "value": "RelatedPublicationCitation3" - }, - "publicationIDType": { - "typeName": "publicationIDType", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "CSTR" - }, - "publicationIDNumber": { - "typeName": "publicationIDNumber", - "multiple": false, - "typeClass": "primitive", - "value": "RelatedPublicationIDNumber3" - }, - "publicationURL": { - "typeName": "publicationURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://RelatedPublicationURL3.org" - } } ] }, From af57e5dad41a303ec969b9ff9270a2a55ed83d4c Mon Sep 17 00:00:00 2001 From: cstr Date: Thu, 15 Sep 2022 18:12:31 +0800 Subject: [PATCH 139/608] Update ddi_dataset.xml override --- .../source/_static/api/ddi_dataset.xml | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/doc/sphinx-guides/source/_static/api/ddi_dataset.xml b/doc/sphinx-guides/source/_static/api/ddi_dataset.xml index 0c5d5857b54..1e86f911a46 100644 --- a/doc/sphinx-guides/source/_static/api/ddi_dataset.xml +++ b/doc/sphinx-guides/source/_static/api/ddi_dataset.xml @@ -164,10 +164,8 @@ RelatedMaterial1 RelatedMaterial2 - RelatedMaterial3 RelatedDatasets1 RelatedDatasets2 - RelatedDatasets3 @@ -185,19 +183,9 @@ RelatedPublicationCitation2 - - - - - RelatedPublicationIDNumber3 - - RelatedPublicationCitation3 - - - + OtherReferences1 OtherReferences2 - OtherReferences3 StudyLevelErrorNotes From 6b6ab875203964c113e2cec2bebf9bd1c61917bd Mon Sep 17 00:00:00 2001 From: cstr Date: Thu, 15 Sep 2022 18:17:03 +0800 Subject: [PATCH 140/608] Update dataset-create-new-all-default-fields.json override --- ...dataset-create-new-all-default-fields.json | 26 ------------------- 1 file changed, 26 deletions(-) diff --git a/scripts/api/data/dataset-create-new-all-default-fields.json b/scripts/api/data/dataset-create-new-all-default-fields.json index e51f4d9e1b5..d7ae8cefbf7 100644 --- a/scripts/api/data/dataset-create-new-all-default-fields.json +++ b/scripts/api/data/dataset-create-new-all-default-fields.json @@ -369,32 +369,6 @@ "typeClass": "primitive", "value": "http://RelatedPublicationURL2.org" } - }, - { - "publicationCitation": { - "typeName": "publicationCitation", - "multiple": false, - "typeClass": "primitive", - "value": "RelatedPublicationCitation3" - }, - "publicationIDType": { - "typeName": "publicationIDType", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "CSTR" - }, - "publicationIDNumber": { - "typeName": "publicationIDNumber", - "multiple": false, - "typeClass": "primitive", - "value": "RelatedPublicationIDNumber3" - }, - "publicationURL": { - "typeName": "publicationURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://RelatedPublicationURL3.org" - } } ] }, From f62474691479f4d1c246c72f53ceb7c2c650350f Mon Sep 17 00:00:00 2001 From: cstr Date: Thu, 15 Sep 2022 18:17:54 +0800 Subject: [PATCH 141/608] Update ddi_dataset.xml remove space --- doc/sphinx-guides/source/_static/api/ddi_dataset.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/_static/api/ddi_dataset.xml b/doc/sphinx-guides/source/_static/api/ddi_dataset.xml index 1e86f911a46..05eaadc3458 100644 --- a/doc/sphinx-guides/source/_static/api/ddi_dataset.xml +++ b/doc/sphinx-guides/source/_static/api/ddi_dataset.xml @@ -183,7 +183,7 @@ RelatedPublicationCitation2 - + OtherReferences1 OtherReferences2 From f1b1e3680cf0a4767d1535bff8aaf508a791ae02 Mon Sep 17 00:00:00 2001 From: cstr Date: Thu, 15 Sep 2022 18:21:34 +0800 Subject: [PATCH 142/608] Create maven-publish.yml --- .github/workflows/maven-publish.yml | 54 +++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 .github/workflows/maven-publish.yml diff --git a/.github/workflows/maven-publish.yml b/.github/workflows/maven-publish.yml new file mode 100644 index 00000000000..05c017789db --- /dev/null +++ b/.github/workflows/maven-publish.yml @@ -0,0 +1,54 @@ +name: Maven Unit Tests + +on: + push: + paths: + - "**.java" + - "pom.xml" + - "modules/**/pom.xml" + pull_request: + paths: + - "**.java" + - "pom.xml" + - "modules/**/pom.xml" + +jobs: + unittest: + name: (${{ matrix.status}} / JDK ${{ matrix.jdk }}) Unit Tests + strategy: + fail-fast: false + matrix: + jdk: [ '11' ] + experimental: [false] + status: ["Stable"] + # + # JDK 17 builds disabled due to non-essential fails marking CI jobs as completely failed within + # Github Projects, PR lists etc. This was consensus on Slack #dv-tech. See issue #8094 + # (This is a limitation of how Github is currently handling these things.) + # + #include: + # - jdk: '17' + # experimental: true + # status: "Experimental" + continue-on-error: ${{ matrix.experimental }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up JDK ${{ matrix.jdk }} + uses: actions/setup-java@v2 + with: + java-version: ${{ matrix.jdk }} + distribution: 'adopt' + - name: Cache Maven packages + uses: actions/cache@v2 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-m2 + - name: Build with Maven + run: mvn -DcompilerArgument=-Xlint:unchecked -Dtarget.java.version=${{ matrix.jdk }} -P all-unit-tests clean test + - name: Maven Code Coverage + env: + CI_NAME: github + COVERALLS_SECRET: ${{ secrets.GITHUB_TOKEN }} + run: mvn -V -B jacoco:report coveralls:report -DrepoToken=${COVERALLS_SECRET} -DpullRequest=${{ github.event.number }} From 8e7509d906cce8e61d3bb8d15f20b35e98d4f8f8 Mon Sep 17 00:00:00 2001 From: cstr Date: Thu, 15 Sep 2022 18:32:24 +0800 Subject: [PATCH 143/608] Delete maven-publish.yml --- .github/workflows/maven-publish.yml | 54 ----------------------------- 1 file changed, 54 deletions(-) delete mode 100644 .github/workflows/maven-publish.yml diff --git a/.github/workflows/maven-publish.yml b/.github/workflows/maven-publish.yml deleted file mode 100644 index 05c017789db..00000000000 --- a/.github/workflows/maven-publish.yml +++ /dev/null @@ -1,54 +0,0 @@ -name: Maven Unit Tests - -on: - push: - paths: - - "**.java" - - "pom.xml" - - "modules/**/pom.xml" - pull_request: - paths: - - "**.java" - - "pom.xml" - - "modules/**/pom.xml" - -jobs: - unittest: - name: (${{ matrix.status}} / JDK ${{ matrix.jdk }}) Unit Tests - strategy: - fail-fast: false - matrix: - jdk: [ '11' ] - experimental: [false] - status: ["Stable"] - # - # JDK 17 builds disabled due to non-essential fails marking CI jobs as completely failed within - # Github Projects, PR lists etc. This was consensus on Slack #dv-tech. See issue #8094 - # (This is a limitation of how Github is currently handling these things.) - # - #include: - # - jdk: '17' - # experimental: true - # status: "Experimental" - continue-on-error: ${{ matrix.experimental }} - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Set up JDK ${{ matrix.jdk }} - uses: actions/setup-java@v2 - with: - java-version: ${{ matrix.jdk }} - distribution: 'adopt' - - name: Cache Maven packages - uses: actions/cache@v2 - with: - path: ~/.m2 - key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} - restore-keys: ${{ runner.os }}-m2 - - name: Build with Maven - run: mvn -DcompilerArgument=-Xlint:unchecked -Dtarget.java.version=${{ matrix.jdk }} -P all-unit-tests clean test - - name: Maven Code Coverage - env: - CI_NAME: github - COVERALLS_SECRET: ${{ secrets.GITHUB_TOKEN }} - run: mvn -V -B jacoco:report coveralls:report -DrepoToken=${COVERALLS_SECRET} -DpullRequest=${{ github.event.number }} From b903e2abf5e4527349ec177b063e36f1d216d999 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 15 Sep 2022 19:19:35 -0400 Subject: [PATCH 144/608] release note and addition to search doc --- doc/release-notes/8239-geospatial-indexing.md | 1 + doc/sphinx-guides/source/api/search.rst | 14 ++++++++++++++ 2 files changed, 15 insertions(+) create mode 100644 doc/release-notes/8239-geospatial-indexing.md diff --git a/doc/release-notes/8239-geospatial-indexing.md b/doc/release-notes/8239-geospatial-indexing.md new file mode 100644 index 00000000000..3e6ba0e7a07 --- /dev/null +++ b/doc/release-notes/8239-geospatial-indexing.md @@ -0,0 +1 @@ +Support for indexing the Geographic Bounding Box fields from the Geospatial metadata block has been added. This allows trusted applications with access to solr to perform geospatial queries to find datasets, e.g. those near a given point. This is also a step towards enabling geospatial queries via the Dataverse API and UI. diff --git a/doc/sphinx-guides/source/api/search.rst b/doc/sphinx-guides/source/api/search.rst index d5e56543fb1..149ad132f79 100755 --- a/doc/sphinx-guides/source/api/search.rst +++ b/doc/sphinx-guides/source/api/search.rst @@ -730,3 +730,17 @@ Output from iteration example CORS + + +Geospatial Indexing +------------------- + +Dataverse indexes the Geospatial Bounding Box field from the Geospatial metadatablock as a solr.BBoxField enabling `Spatial Search`_. This capability is not yet exposed through the Dataverse API or UI but can be accessed by trusted applications with direct solr access. +For example, a query of the form + +.. code-block:: none + + q=*.*&fq={!bbox sfield=solr_srpt}=&pt=10,10&d=5 + + +would find datasets with information near the point latitude=10, longitude=10. From a72e88ced9f09e0c8897e5707598252f2d54184c Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 20 Jun 2022 17:46:47 +0200 Subject: [PATCH 145/608] refactor(settings): simplify SystemConfig.getVersion #7000 Instead of trying to read a built time file from Maven, use MicroProfile Config to retrieve the version and build number. The version is by default set via microprofile-config.properties (or overridden by an env var in a container). The build number is still read from either BuildNumber.properties or, if not present, from MicroProfile Config, defaulting to empty. This also avoids copying extra files into containers to retrieve the version string. --- .../iq/dataverse/util/SystemConfig.java | 175 ++++-------------- .../iq/dataverse/util/SystemConfigTest.java | 36 ++++ 2 files changed, 77 insertions(+), 134 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index bd27405fae5..25dd3dd6138 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -1,18 +1,28 @@ package edu.harvard.iq.dataverse.util; import com.ocpsoft.pretty.PrettyContext; - import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataverseServiceBean; import edu.harvard.iq.dataverse.DvObjectContainer; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinAuthenticationProvider; import edu.harvard.iq.dataverse.authorization.providers.oauth2.AbstractOAuth2AuthenticationProvider; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.validation.PasswordValidatorUtil; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; +import org.eclipse.microprofile.config.Config; +import org.eclipse.microprofile.config.ConfigProvider; +import org.passay.CharacterRule; + +import javax.ejb.EJB; +import javax.ejb.Stateless; +import javax.inject.Named; +import javax.json.Json; +import javax.json.JsonArray; +import javax.json.JsonObject; +import javax.json.JsonReader; +import javax.json.JsonString; +import javax.json.JsonValue; import java.io.StringReader; import java.net.InetAddress; import java.net.UnknownHostException; @@ -23,25 +33,11 @@ import java.util.List; import java.util.Map; import java.util.MissingResourceException; -import java.util.Properties; import java.util.ResourceBundle; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; -import javax.ejb.EJB; -import javax.ejb.Stateless; -import javax.inject.Named; -import javax.json.Json; -import javax.json.JsonArray; -import javax.json.JsonObject; -import javax.json.JsonReader; -import javax.json.JsonString; -import javax.json.JsonValue; - -import org.passay.CharacterRule; -import org.apache.commons.io.IOUtils; - /** * System-wide configuration */ @@ -50,6 +46,7 @@ public class SystemConfig { private static final Logger logger = Logger.getLogger(SystemConfig.class.getCanonicalName()); + private static final Config config = ConfigProvider.getConfig(); @EJB SettingsServiceBean settingsService; @@ -109,9 +106,8 @@ public class SystemConfig { public static final long defaultZipDownloadLimit = 104857600L; // 100MB private static final int defaultMultipleUploadFilesLimit = 1000; private static final int defaultLoginSessionTimeout = 480; // = 8 hours - - private static String appVersionString = null; - private static String buildNumberString = null; + + private String buildNumber = null; private static final String JVM_TIMER_SERVER_OPTION = "dataverse.timerServer"; @@ -132,127 +128,38 @@ public String getVersion() { // candidate for being moved into some kind of an application-scoped caching // service... some CachingService @Singleton - ? (L.A. 5.8) public String getVersion(boolean withBuildNumber) { - - if (appVersionString == null) { - - // The Version Number is no longer supplied in a .properties file - so - // we can't just do - // return BundleUtil.getStringFromBundle("version.number", null, ResourceBundle.getBundle("VersionNumber", Locale.US)); - // - // Instead, we'll rely on Maven placing the version number into the - // Manifest, and getting it from there: - // (this is considered a better practice, and will also allow us - // to maintain this number in only one place - the pom.xml file) - // -- L.A. 4.0.2 - - // One would assume, that once the version is in the MANIFEST.MF, - // as Implementation-Version:, it would be possible to obtain - // said version simply as - // appVersionString = getClass().getPackage().getImplementationVersion(); - // alas - that's not working, for whatever reason. (perhaps that's - // only how it works with jar-ed packages; not with .war files). - // People on the interwebs suggest that one should instead - // open the Manifest as a resource, then extract its attributes. - // There were some complications with that too. Plus, relying solely - // on the MANIFEST.MF would NOT work for those of the developers who - // are using "in place deployment" (i.e., where - // Netbeans runs their builds directly from the local target - // directory, bypassing the war file deployment; and the Manifest - // is only available in the .war file). For that reason, I am - // going to rely on the pom.properties file, and use java.util.Properties - // to read it. We have to look for this file in 2 different places - // depending on whether this is a .war file deployment, or a - // developers build. (the app-level META-INF is only populated when - // a .war file is built; the "maven-archiver" directory, on the other - // hand, is only available when it's a local build deployment). - // So, long story short, I'm resorting to the convoluted steps below. - // It may look hacky, but it should actually be pretty solid and - // reliable. - - - // First, find the absolute path url of the application persistence file - // always supplied with the Dataverse app: - java.net.URL fileUrl = Thread.currentThread().getContextClassLoader().getResource("META-INF/persistence.xml"); - String filePath = null; - - - if (fileUrl != null) { - filePath = fileUrl.getFile(); - if (filePath != null) { - InputStream mavenPropertiesInputStream = null; - String mavenPropertiesFilePath; - Properties mavenProperties = new Properties(); - - - filePath = filePath.replaceFirst("/[^/]*$", "/"); - // Using a relative path, find the location of the maven pom.properties file. - // First, try to look for it in the app-level META-INF. This will only be - // available if it's a war file deployment: - mavenPropertiesFilePath = filePath.concat("../../../META-INF/maven/edu.harvard.iq/dataverse/pom.properties"); - - try { - mavenPropertiesInputStream = new FileInputStream(mavenPropertiesFilePath); - } catch (IOException ioex) { - // OK, let's hope this is a local dev. build. - // In that case the properties file should be available in - // the maven-archiver directory: - - mavenPropertiesFilePath = filePath.concat("../../../../maven-archiver/pom.properties"); - - // try again: - - try { - mavenPropertiesInputStream = new FileInputStream(mavenPropertiesFilePath); - } catch (IOException ioex2) { - logger.warning("Failed to find and/or open for reading the pom.properties file."); - mavenPropertiesInputStream = null; - } - } - - if (mavenPropertiesInputStream != null) { - try { - mavenProperties.load(mavenPropertiesInputStream); - appVersionString = mavenProperties.getProperty("version"); - } catch (IOException ioex) { - logger.warning("caught IOException trying to read and parse the pom properties file."); - } finally { - IOUtils.closeQuietly(mavenPropertiesInputStream); - } - } - - } else { - logger.warning("Null file path representation of the location of persistence.xml in the webapp root directory!"); - } - } else { - logger.warning("Could not find the location of persistence.xml in the webapp root directory!"); - } - - - if (appVersionString == null) { - // still null? - defaulting to 4.0: - appVersionString = "4.0"; - } - } + // Retrieve the version via MPCONFIG + // NOTE: You may override the version via all methods of MPCONFIG. + // It will default to read from microprofile-config.properties source, + // which contains in the source a Maven property reference to ${project.version}. + // When packaging the app to deploy it, Maven will replace this, rendering it a static entry. + // NOTE: MicroProfile Config will cache the entry for us in internal maps. + String appVersion = JvmSettings.VERSION.lookup(); if (withBuildNumber) { - if (buildNumberString == null) { - // (build number is still in a .properties file in the source tree; it only - // contains a real build number if this war file was built by - // Jenkins) - + if (buildNumber == null) { + // (build number is still in a .properties file in the source tree; it only + // contains a real build number if this war file was built by Jenkins) + // TODO: might be replaced with same trick as for version via Maven property w/ empty default try { - buildNumberString = ResourceBundle.getBundle("BuildNumber").getString("build.number"); + buildNumber = ResourceBundle.getBundle("BuildNumber").getString("build.number"); } catch (MissingResourceException ex) { - buildNumberString = null; + buildNumber = null; + } + + // Also try to read the build number via MicroProfile Config if not already present from the + // properties file (so can be overridden by env var or other source) + if (buildNumber == null || buildNumber.isEmpty()) { + buildNumber = JvmSettings.BUILD.lookupOptional().orElse(""); } } - if (buildNumberString != null && !buildNumberString.equals("")) { - return appVersionString + " build " + buildNumberString; - } - } + if (!buildNumber.equals("")) { + return appVersion + " build " + buildNumber; + } + } - return appVersionString; + return appVersion; } public String getSolrHostColonPort() { diff --git a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java index 891b029f521..b8ad0a57748 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java @@ -1,13 +1,49 @@ package edu.harvard.iq.dataverse.util; +import edu.harvard.iq.dataverse.settings.JvmSettings; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; class SystemConfigTest { + SystemConfig systemConfig = new SystemConfig(); + + @Test + void testGetVersion() { + // given + String version = "100.100"; + System.setProperty(JvmSettings.VERSION.getScopedKey(), version); + + // when + String result = systemConfig.getVersion(false); + + // then + assertEquals(version, result); + } + + @Test + void testGetVersionWithBuild() { + // given + String version = "100.100"; + String build = "FOOBAR"; + System.setProperty(JvmSettings.VERSION.getScopedKey(), version); + System.setProperty(JvmSettings.BUILD.getScopedKey(), build); + + // when + String result = systemConfig.getVersion(true); + + // then + assertTrue(result.startsWith(version), "'" + result + "' not starting with " + version); + assertTrue(result.contains("build")); + + // Cannot test this here - there might be the bundle file present which is not under test control + //assertTrue(result.endsWith(build), "'" + result + "' not ending with " + build); + } + @Test void testGetLongLimitFromStringOrDefault_withNullInput() { long defaultValue = 5L; From 5f925edf6668893c96df5117157086ef641a5b44 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 30 Jun 2022 21:57:49 +0200 Subject: [PATCH 146/608] docs(dev): add some tips about new options dataverse.build and dataverse.version #7000 --- doc/sphinx-guides/source/developers/tips.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doc/sphinx-guides/source/developers/tips.rst b/doc/sphinx-guides/source/developers/tips.rst index 3fff3e76ea8..2b15948bd34 100755 --- a/doc/sphinx-guides/source/developers/tips.rst +++ b/doc/sphinx-guides/source/developers/tips.rst @@ -173,6 +173,13 @@ commit id in your test deployment webpages on the bottom right corner next to th When you prefer manual updates, there is another script, see above: :ref:`custom_build_num_script`. +An alternative to that is using *MicroProfile Config* and set the option ``dataverse.build`` via a system property, +environment variable (``DATAVERSE_BUILD``) or `one of the other config sources +`__. + +You could even override the version itself with the option ``dataverse.version`` in the same way, which is usually +picked up from a build time source. + Sample Data ----------- From 32f7a6f59743dda59fa3ff3c779fc35e6239f1e1 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 21 Jun 2022 20:57:01 +0200 Subject: [PATCH 147/608] refactor(settings): make Solr endpoint configurable via MPCONFIG #7000 By refactoring SystemConfig.getSolrHostColonPort, the Solr endpoint is not just configurable via a database setting, but also by all mechanisms of MicroProfile Config. - The database setting still has priority over the other mechanisms. - It's completely backward compatible, no config change necessary. - Tests have been added to ensure the behaviour - Default ("localhost:8983") for no setting given is now also done via MPCONFIG - Default for container usage ("solr:8983") possible via MPCONFIG profile "ct" --- .../iq/dataverse/settings/JvmSettings.java | 5 ++ .../iq/dataverse/util/SystemConfig.java | 37 +++++++----- .../META-INF/microprofile-config.properties | 6 ++ .../iq/dataverse/util/SystemConfigTest.java | 59 ++++++++++++++++++- 4 files changed, 92 insertions(+), 15 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index 223e4b86da9..e73453abc16 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -42,6 +42,11 @@ public enum JvmSettings { VERSION(PREFIX, "version"), BUILD(PREFIX, "build"), + // SOLR INDEX SETTINGS + SCOPE_SOLR(PREFIX, "solr"), + SOLR_HOST(SCOPE_SOLR, "host"), + SOLR_PORT(SCOPE_SOLR, "port"), + ; private static final String SCOPE_SEPARATOR = "."; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index bd27405fae5..acdd112196f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -8,6 +8,7 @@ import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinAuthenticationProvider; import edu.harvard.iq.dataverse.authorization.providers.oauth2.AbstractOAuth2AuthenticationProvider; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.validation.PasswordValidatorUtil; import java.io.FileInputStream; @@ -24,6 +25,7 @@ import java.util.Map; import java.util.MissingResourceException; import java.util.Properties; +import java.util.Optional; import java.util.ResourceBundle; import java.util.logging.Logger; import java.util.regex.Matcher; @@ -95,12 +97,6 @@ public class SystemConfig { */ private static final String PASSWORD_RESET_TIMEOUT_IN_MINUTES = "dataverse.auth.password-reset-timeout-in-minutes"; - /** - * A common place to find the String for a sane Solr hostname:port - * combination. - */ - private String saneDefaultForSolrHostColonPort = "localhost:8983"; - /** * The default number of datafiles that we allow to be created through * zip file upload. @@ -254,15 +250,28 @@ public String getVersion(boolean withBuildNumber) { return appVersionString; } - + + /** + * Retrieve the Solr endpoint in "host:port" form, to be used with a Solr client. + * + * This will retrieve the setting from either the database ({@link SettingsServiceBean.Key#SolrHostColonPort}) or + * via Microprofile Config API (properties {@link JvmSettings#SOLR_HOST} and {@link JvmSettings#SOLR_PORT}). + * + * A database setting always takes precedence. If not given via other config sources, a default from + * resources/META-INF/microprofile-config.properties is used. (It's possible to use profiles.) + * + * @return Solr endpoint as string "hostname:port" + */ public String getSolrHostColonPort() { - String SolrHost; - if ( System.getenv("SOLR_SERVICE_HOST") != null && System.getenv("SOLR_SERVICE_HOST") != ""){ - SolrHost = System.getenv("SOLR_SERVICE_HOST"); - } - else SolrHost = saneDefaultForSolrHostColonPort; - String solrHostColonPort = settingsService.getValueForKey(SettingsServiceBean.Key.SolrHostColonPort, SolrHost); - return solrHostColonPort; + // Get from MPCONFIG. Might be configured by a sysadmin or simply return the default shipped with + // resources/META-INF/microprofile-config.properties. + // NOTE: containers should use system property mp.config.profile=ct to use sane container usage default + String host = JvmSettings.SOLR_HOST.lookup(); + String port = JvmSettings.SOLR_PORT.lookup(); + + // DB setting takes precedence over all. If not present, will return default from above. + return Optional.ofNullable(settingsService.getValueForKey(SettingsServiceBean.Key.SolrHostColonPort)) + .orElse(host + ":" + port); } public boolean isProvCollectionEnabled() { diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties index 16298d83118..b6aa686de01 100644 --- a/src/main/resources/META-INF/microprofile-config.properties +++ b/src/main/resources/META-INF/microprofile-config.properties @@ -3,6 +3,12 @@ dataverse.version=${project.version} dataverse.build= +# SEARCH INDEX +dataverse.solr.host=localhost +# Activating mp config profile -Dmp.config.profile=ct changes default to "solr" as DNS name +%ct.dataverse.solr.host=solr +dataverse.solr.port=8983 + # DATABASE dataverse.db.host=localhost dataverse.db.port=5432 diff --git a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java index 891b029f521..75f919b90ab 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java @@ -1,13 +1,70 @@ package edu.harvard.iq.dataverse.util; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.doReturn; +@ExtendWith(MockitoExtension.class) class SystemConfigTest { - + + @InjectMocks + SystemConfig systemConfig = new SystemConfig(); + @Mock + SettingsServiceBean settingsService; + + @Test + @JvmSetting(key = JvmSettings.SOLR_HOST, value = "foobar") + @JvmSetting(key = JvmSettings.SOLR_PORT, value = "1234") + void testGetSolrHostColonPortNoDBEntry() { + // given + String hostPort = "foobar:1234"; + + // when + doReturn(null).when(settingsService).getValueForKey(SettingsServiceBean.Key.SolrHostColonPort); + String result = systemConfig.getSolrHostColonPort(); + + // then + assertEquals(hostPort, result); + } + + @Test + @JvmSetting(key = JvmSettings.SOLR_HOST, value = "foobar") + @JvmSetting(key = JvmSettings.SOLR_PORT, value = "1234") + void testGetSolrHostColonPortWithDBEntry() { + // given + String dbEntry = "hello:4321"; + + // when + doReturn(dbEntry).when(settingsService).getValueForKey(SettingsServiceBean.Key.SolrHostColonPort); + String result = systemConfig.getSolrHostColonPort(); + + // then + assertEquals(dbEntry, result); + } + + @Test + void testGetSolrHostColonPortDefault() { + // given + String hostPort = "localhost:8983"; + + // when + doReturn(null).when(settingsService).getValueForKey(SettingsServiceBean.Key.SolrHostColonPort); + String result = systemConfig.getSolrHostColonPort(); + + // then + assertEquals(hostPort, result); + } + @Test void testGetLongLimitFromStringOrDefault_withNullInput() { long defaultValue = 5L; From af36a0d4b6fb03502bb6dec65d0acfd60116d2c4 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 21 Jun 2022 20:59:40 +0200 Subject: [PATCH 148/608] feat(settings,solr): make Solr URL details configurable When using Dataverse with a non-default Solr, HTTPS, custom core name or similar, it's necessary to have a configurable URL for the Solr endpoint. This becomes now possible via MicroProfile Config, defaulting to the old variant. --- .../iq/dataverse/search/IndexServiceBean.java | 13 +++- .../dataverse/search/SolrClientService.java | 12 ++- .../iq/dataverse/settings/JvmSettings.java | 3 + .../META-INF/microprofile-config.properties | 3 + .../search/IndexServiceBeanTest.java | 73 +++++++++++++++---- .../search/SolrClientServiceTest.java | 59 +++++++++++++++ 6 files changed, 144 insertions(+), 19 deletions(-) create mode 100644 src/test/java/edu/harvard/iq/dataverse/search/SolrClientServiceTest.java diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 484e5768eb1..06a6e5928df 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -30,6 +30,7 @@ import edu.harvard.iq.dataverse.datavariable.VariableMetadataUtil; import edu.harvard.iq.dataverse.datavariable.VariableServiceBean; import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.StringUtil; @@ -86,6 +87,8 @@ import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; import org.apache.tika.sax.BodyContentHandler; +import org.eclipse.microprofile.config.Config; +import org.eclipse.microprofile.config.ConfigProvider; import org.xml.sax.ContentHandler; @Stateless @@ -93,6 +96,7 @@ public class IndexServiceBean { private static final Logger logger = Logger.getLogger(IndexServiceBean.class.getCanonicalName()); + private static final Config config = ConfigProvider.getConfig(); @PersistenceContext(unitName = "VDCNet-ejbPU") private EntityManager em; @@ -153,13 +157,18 @@ public class IndexServiceBean { public static final String HARVESTED = "Harvested"; private String rootDataverseName; private Dataverse rootDataverseCached; - private SolrClient solrServer; + SolrClient solrServer; private VariableMetadataUtil variableMetadataUtil; @PostConstruct public void init() { - String urlString = "http://" + systemConfig.getSolrHostColonPort() + "/solr/collection1"; + // Get from MPCONFIG. Might be configured by a sysadmin or simply return the default shipped with + // resources/META-INF/microprofile-config.properties. + String protocol = JvmSettings.SOLR_PROT.lookup(); + String path = JvmSettings.SOLR_PATH.lookup(); + + String urlString = protocol + "://" + systemConfig.getSolrHostColonPort() + path; solrServer = new HttpSolrClient.Builder(urlString).build(); rootDataverseName = findRootDataverseCached().getName(); diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrClientService.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrClientService.java index f00ece9aacc..70483853979 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrClientService.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrClientService.java @@ -5,6 +5,7 @@ */ package edu.harvard.iq.dataverse.search; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.IOException; import java.util.logging.Logger; @@ -15,6 +16,8 @@ import javax.inject.Named; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.impl.HttpSolrClient; +import org.eclipse.microprofile.config.Config; +import org.eclipse.microprofile.config.ConfigProvider; /** * @@ -30,6 +33,7 @@ @Singleton public class SolrClientService { private static final Logger logger = Logger.getLogger(SolrClientService.class.getCanonicalName()); + private static final Config config = ConfigProvider.getConfig(); @EJB SystemConfig systemConfig; @@ -38,9 +42,13 @@ public class SolrClientService { @PostConstruct public void init() { - String urlString = "http://" + systemConfig.getSolrHostColonPort() + "/solr/collection1"; - solrClient = new HttpSolrClient.Builder(urlString).build(); + // Get from MPCONFIG. Might be configured by a sysadmin or simply return the default shipped with + // resources/META-INF/microprofile-config.properties. + String protocol = JvmSettings.SOLR_PROT.lookup(); + String path = JvmSettings.SOLR_PATH.lookup(); + String urlString = protocol + "://" + systemConfig.getSolrHostColonPort() + path; + solrClient = new HttpSolrClient.Builder(urlString).build(); } @PreDestroy diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index e73453abc16..222346e3b35 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -46,6 +46,9 @@ public enum JvmSettings { SCOPE_SOLR(PREFIX, "solr"), SOLR_HOST(SCOPE_SOLR, "host"), SOLR_PORT(SCOPE_SOLR, "port"), + SOLR_PROT(SCOPE_SOLR, "protocol"), + SOLR_CORE(SCOPE_SOLR, "core"), + SOLR_PATH(SCOPE_SOLR, "path"), ; diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties index b6aa686de01..c846d80220c 100644 --- a/src/main/resources/META-INF/microprofile-config.properties +++ b/src/main/resources/META-INF/microprofile-config.properties @@ -8,6 +8,9 @@ dataverse.solr.host=localhost # Activating mp config profile -Dmp.config.profile=ct changes default to "solr" as DNS name %ct.dataverse.solr.host=solr dataverse.solr.port=8983 +dataverse.solr.protocol=http +dataverse.solr.core=collection1 +dataverse.solr.path=/solr/${dataverse.solr.core} # DATABASE dataverse.db.host=localhost diff --git a/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java index ad4647e4898..aab6af660cb 100644 --- a/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java @@ -1,18 +1,5 @@ package edu.harvard.iq.dataverse.search; -import static org.junit.Assert.assertTrue; - -import java.io.IOException; -import java.util.Arrays; -import java.util.Set; -import java.util.logging.Logger; -import java.util.stream.Collectors; - -import org.apache.solr.client.solrj.SolrServerException; -import org.junit.Before; -import org.junit.Test; -import org.mockito.Mockito; - import edu.harvard.iq.dataverse.ControlledVocabularyValue; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetField; @@ -26,21 +13,47 @@ import edu.harvard.iq.dataverse.MetadataBlock; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.mocks.MocksFactory; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.impl.HttpSolrClient; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.junit.jupiter.MockitoExtension; +import java.io.IOException; +import java.util.Arrays; +import java.util.Set; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +@ExtendWith(MockitoExtension.class) public class IndexServiceBeanTest { private static final Logger logger = Logger.getLogger(IndexServiceBeanTest.class.getCanonicalName()); private IndexServiceBean indexService; private Dataverse dataverse; - @Before + @Mock + private SettingsServiceBean settingsService; + @InjectMocks + private SystemConfig systemConfig = new SystemConfig(); + + @BeforeEach public void setUp() { dataverse = MocksFactory.makeDataverse(); dataverse.setDataverseType(DataverseType.UNCATEGORIZED); indexService = new IndexServiceBean(); - indexService.systemConfig = new SystemConfig(); + indexService.systemConfig = systemConfig; indexService.settingsService = Mockito.mock(SettingsServiceBean.class); indexService.dataverseService = Mockito.mock(DataverseServiceBean.class); indexService.datasetFieldService = Mockito.mock(DatasetFieldServiceBean.class); @@ -48,6 +61,36 @@ public void setUp() { Mockito.when(indexService.dataverseService.findRootDataverse()).thenReturn(dataverse); } + + @Test + public void testInitWithDefaults() { + // given + String url = "http://localhost:8983/solr/collection1"; + + // when + indexService.init(); + + // then + HttpSolrClient client = (HttpSolrClient) indexService.solrServer; + assertEquals(url, client.getBaseURL()); + } + + + @Test + @JvmSetting(key = JvmSettings.SOLR_HOST, value = "foobar") + @JvmSetting(key = JvmSettings.SOLR_PORT, value = "1234") + @JvmSetting(key = JvmSettings.SOLR_CORE, value = "test") + void testInitWithConfig() { + // given + String url = "http://foobar:1234/solr/test"; + + // when + indexService.init(); + + // then + HttpSolrClient client = (HttpSolrClient) indexService.solrServer; + assertEquals(url, client.getBaseURL()); + } @Test public void TestIndexing() throws SolrServerException, IOException { diff --git a/src/test/java/edu/harvard/iq/dataverse/search/SolrClientServiceTest.java b/src/test/java/edu/harvard/iq/dataverse/search/SolrClientServiceTest.java new file mode 100644 index 00000000000..a3b3c8a2080 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/search/SolrClientServiceTest.java @@ -0,0 +1,59 @@ +package edu.harvard.iq.dataverse.search; + +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; +import org.apache.solr.client.solrj.impl.HttpSolrClient; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +@ExtendWith(MockitoExtension.class) +class SolrClientServiceTest { + + @Mock + SettingsServiceBean settingsServiceBean; + @InjectMocks + SystemConfig systemConfig; + SolrClientService clientService = new SolrClientService(); + + @BeforeEach + void setUp() { + clientService.systemConfig = systemConfig; + } + + @Test + void testInitWithDefaults() { + // given + String url = "http://localhost:8983/solr/collection1"; + + // when + clientService.init(); + + // then + HttpSolrClient client = (HttpSolrClient) clientService.getSolrClient(); + assertEquals(url, client.getBaseURL()); + } + + @Test + @JvmSetting(key = JvmSettings.SOLR_HOST, value = "foobar") + @JvmSetting(key = JvmSettings.SOLR_PORT, value = "1234") + @JvmSetting(key = JvmSettings.SOLR_CORE, value = "test") + void testInitWithConfig() { + // given + String url = "http://foobar:1234/solr/test"; + + // when + clientService.init(); + + // then + HttpSolrClient client = (HttpSolrClient) clientService.getSolrClient(); + assertEquals(url, client.getBaseURL()); + } +} \ No newline at end of file From 0727d85b6a2755e2eb754988ff20a55aae8c92fb Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 23 Jun 2022 18:13:27 +0200 Subject: [PATCH 149/608] docs(settings): mark :SolrHostColonPort with @Deprecated #7000 --- .../harvard/iq/dataverse/settings/SettingsServiceBean.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index c12b8f6e452..98dd6e2fa3b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -174,7 +174,12 @@ public enum Key { * */ SearchRespectPermissionRoot, - /** Solr hostname and port, such as "localhost:8983". */ + /** + * Solr hostname and port, such as "localhost:8983". + * @deprecated New installations should not use this database setting, but use {@link JvmSettings#SOLR_HOST} + * and {@link JvmSettings#SOLR_PORT}. + */ + @Deprecated(forRemoval = true, since = "2022-07-01") SolrHostColonPort, /** Enable full-text indexing in solr up to max file size */ SolrFullTextIndexing, //true or false (default) From 6965bebe78b83987af069f4f6a13e6a50539884a Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 21 Jun 2022 21:34:07 +0200 Subject: [PATCH 150/608] docs(settings): add Solr MPCONFIG options to guides #7000 Describe the new options to set the Solr endpoint, crosslinking the old way and adding hints about MPCONFIG profiles. --- .../source/installation/config.rst | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index ab0bad70206..c1cf39e4182 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1462,6 +1462,61 @@ Defaults to ``5432``, the default PostgreSQL port. Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_DB_PORT``. +.. _dataverse.solr.host: + +dataverse.solr.host ++++++++++++++++++++ + +The hostname of a Solr server to connect to. Remember to restart / redeploy Dataverse after changing the setting +(as with :ref:`:SolrHostColonPort`). + +Defaults to ``localhost``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_HOST``. +Defaults to ``solr``, when used with ``mp.config.profile=ct`` (:ref:`see below <:ApplicationServerSettings>`). + +dataverse.solr.port ++++++++++++++++++++ + +The Solr server port to connect to. Remember to restart / redeploy Dataverse after changing the setting +(as with :ref:`:SolrHostColonPort`). + +Defaults to ``8983``, the default Solr port. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_PORT``. + +dataverse.solr.core ++++++++++++++++++++ + +The name of the Solr core to use for this Dataverse installation. Might be used to switch to a different core quickly. +Remember to restart / redeploy Dataverse after changing the setting (as with :ref:`:SolrHostColonPort`). + +Defaults to ``collection1``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_CORE``. + +dataverse.solr.protocol ++++++++++++++++++++++++ + +The Solr server URL protocol for the connection. Remember to restart / redeploy Dataverse after changing the setting +(as with :ref:`:SolrHostColonPort`). + +Defaults to ``http``, but might be set to ``https`` for extra secure Solr installations. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_PROTOCOL``. + +dataverse.solr.path ++++++++++++++++++++ + +The path part of the Solr endpoint URL (e.g. ``/solr/collection1`` of ``http://localhost:8389/solr/collection1``). +Might be used to target a Solr API at non-default places. Remember to restart / redeploy Dataverse after changing the +setting (as with :ref:`:SolrHostColonPort`). + +Defaults to ``/solr/${dataverse.solr.core}``, interpolating the core name when used. Make sure to include the variable +when using it to configure your core name! + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_PATH``. + dataverse.rserve.host +++++++++++++++++++++ @@ -1673,6 +1728,21 @@ To facilitate large file upload and download, the Dataverse Software installer b and restart Payara to apply your change. +mp.config.profile ++++++++++++++++++ + +MicroProfile Config 2.0 defines the `concept of "profiles" `_. +They can be used to change configuration values by context. This is used in Dataverse to change some configuration +defaults when used inside container context rather classic installations. + +As per the spec, you will need to set the configuration value ``mp.config.profile`` to ``ct`` as early as possible. +This is best done with a system property: + +``./asadmin create-system-properties 'mp.config.profile=ct'`` + +You might also create your own profiles and use these, please refer to the upstream documentation linked above. + + .. _database-settings: Database Settings @@ -2160,6 +2230,8 @@ Limit the number of files in a zip that your Dataverse installation will accept. ``curl -X PUT -d 2048 http://localhost:8080/api/admin/settings/:ZipUploadFilesLimit`` +.. _:SolrHostColonPort: + :SolrHostColonPort ++++++++++++++++++ @@ -2167,6 +2239,8 @@ By default your Dataverse installation will attempt to connect to Solr on port 8 ``curl -X PUT -d localhost:8983 http://localhost:8080/api/admin/settings/:SolrHostColonPort`` +**Note:** instead of using a database setting, you could alternatively use JVM settings like :ref:`dataverse.solr.host`. + :SolrFullTextIndexing +++++++++++++++++++++ From a7fe29c8e2e088fff71a13327e28c7cbb9595c15 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 16 Sep 2022 10:32:10 +0200 Subject: [PATCH 151/608] test(settings): make SystemConfigTest version testing use JvmSetting extension --- .../harvard/iq/dataverse/util/SystemConfigTest.java | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java index b8ad0a57748..3bbe331a361 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse.util; import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; @@ -26,22 +27,18 @@ void testGetVersion() { } @Test + @JvmSetting(key = JvmSettings.VERSION, value = "100.100") + @JvmSetting(key = JvmSettings.BUILD, value = "FOOBAR") void testGetVersionWithBuild() { - // given - String version = "100.100"; - String build = "FOOBAR"; - System.setProperty(JvmSettings.VERSION.getScopedKey(), version); - System.setProperty(JvmSettings.BUILD.getScopedKey(), build); - // when String result = systemConfig.getVersion(true); // then - assertTrue(result.startsWith(version), "'" + result + "' not starting with " + version); + assertTrue(result.startsWith("100.100"), "'" + result + "' not starting with 100.100"); assertTrue(result.contains("build")); // Cannot test this here - there might be the bundle file present which is not under test control - //assertTrue(result.endsWith(build), "'" + result + "' not ending with " + build); + //assertTrue(result.endsWith("FOOBAR"), "'" + result + "' not ending with FOOBAR"); } @Test From 3fb596e1f97fc074e8dc2056728cb5ace97989e1 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 27 Jun 2022 14:19:35 +0200 Subject: [PATCH 152/608] refactor(settings): replace dataverse.fqdn and siteUrl lookups via MPCONFIG #7000 - Add both settings to JvmSettings to enable lookup - Refactor SystemConfig.getDataverseSiteUrlStatic to use MPCONFIG, but keep current behaviour of constructing the URL from FQDN or DNS reverse lookup. (Out of scope here, see #6636) - Replace clones of the method in Xrecord, DdiExportUtil, HandlenetServiceBean with direct usages of the static method to avoid unnecessary duplicated code. - Refactor SchemaDotOrgExporterTest with @JvmSetting for site url. - Remove unused constants from SystemConfig - Added default for container usage within "ct" profile, so we avoid extra lookups/settings for development usage. See also https://github.com/IQSS/dataverse/issues/6636 --- .../iq/dataverse/HandlenetServiceBean.java | 19 +--- .../edu/harvard/iq/dataverse/api/Info.java | 3 +- .../dataverse/export/ddi/DdiExportUtil.java | 38 +------ .../harvest/server/xoai/Xrecord.java | 25 +---- .../iq/dataverse/settings/JvmSettings.java | 2 + .../iq/dataverse/util/SystemConfig.java | 98 +++++++++---------- .../META-INF/microprofile-config.properties | 5 + .../export/SchemaDotOrgExporterTest.java | 6 +- 8 files changed, 69 insertions(+), 127 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java index 1a8ee8a85e8..df16991b51e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java @@ -24,8 +24,6 @@ import java.io.File; import java.io.FileInputStream; -import java.net.InetAddress; -import java.net.UnknownHostException; import java.util.*; import java.util.logging.Level; import java.util.logging.Logger; @@ -34,6 +32,7 @@ import java.security.PrivateKey; /* Handlenet imports: */ +import edu.harvard.iq.dataverse.util.SystemConfig; import net.handle.hdllib.AbstractMessage; import net.handle.hdllib.AbstractResponse; import net.handle.hdllib.AdminRecord; @@ -247,21 +246,7 @@ private String getRegistrationUrl(DvObject dvObject) { } public String getSiteUrl() { - logger.log(Level.FINE,"getSiteUrl"); - String hostUrl = System.getProperty("dataverse.siteUrl"); - if (hostUrl != null && !"".equals(hostUrl)) { - return hostUrl; - } - String hostName = System.getProperty("dataverse.fqdn"); - if (hostName == null) { - try { - hostName = InetAddress.getLocalHost().getCanonicalHostName(); - } catch (UnknownHostException e) { - return null; - } - } - hostUrl = "https://" + hostName; - return hostUrl; + return SystemConfig.getDataverseSiteUrlStatic(); } private byte[] readKey(final String file) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Info.java b/src/main/java/edu/harvard/iq/dataverse/api/Info.java index 4fe5cba5b9f..fd7824c15cf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Info.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Info.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse.api; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.SystemConfig; import javax.ejb.EJB; @@ -44,7 +45,7 @@ public Response getInfo() { @GET @Path("server") public Response getServer() { - return response( req -> ok(systemConfig.getDataverseServer())); + return response( req -> ok(JvmSettings.FQDN.lookup())); } @GET diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index 4bbcd653ac3..eb7632dd03c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -32,18 +32,15 @@ import edu.harvard.iq.dataverse.export.DDIExporter; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; -import static edu.harvard.iq.dataverse.util.SystemConfig.FQDN; -import static edu.harvard.iq.dataverse.util.SystemConfig.SITE_URL; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; +import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.xml.XmlPrinter; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; -import java.net.InetAddress; -import java.net.UnknownHostException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -1292,7 +1289,7 @@ private static void writeNotesElement(XMLStreamWriter xmlw, DatasetVersionDTO da // harvesting *all* files are encoded as otherMats; even tabular ones. private static void createOtherMats(XMLStreamWriter xmlw, List fileDtos) throws XMLStreamException { // The preferred URL for this dataverse, for cooking up the file access API links: - String dataverseUrl = getDataverseSiteUrl(); + String dataverseUrl = SystemConfig.getDataverseSiteUrlStatic(); for (FileDTO fileDTo : fileDtos) { // We'll continue using the scheme we've used before, in DVN2-3: non-tabular files are put into otherMat, @@ -1339,7 +1336,7 @@ private static void createOtherMats(XMLStreamWriter xmlw, List fileDtos private static void createOtherMatsFromFileMetadatas(XMLStreamWriter xmlw, List fileMetadatas) throws XMLStreamException { // The preferred URL for this dataverse, for cooking up the file access API links: - String dataverseUrl = getDataverseSiteUrl(); + String dataverseUrl = SystemConfig.getDataverseSiteUrlStatic(); for (FileMetadata fileMetadata : fileMetadatas) { // We'll continue using the scheme we've used before, in DVN2-3: non-tabular files are put into otherMat, @@ -1555,33 +1552,6 @@ private static void saveJsonToDisk(String datasetVersionAsJson) throws IOExcepti Files.write(Paths.get("/tmp/out.json"), datasetVersionAsJson.getBytes()); } - /** - * The "official", designated URL of the site; - * can be defined as a complete URL; or derived from the - * "official" hostname. If none of these options is set, - * defaults to the InetAddress.getLocalHOst() and https; - */ - private static String getDataverseSiteUrl() { - String hostUrl = System.getProperty(SITE_URL); - if (hostUrl != null && !"".equals(hostUrl)) { - return hostUrl; - } - String hostName = System.getProperty(FQDN); - if (hostName == null) { - try { - hostName = InetAddress.getLocalHost().getCanonicalHostName(); - } catch (UnknownHostException e) { - hostName = null; - } - } - - if (hostName != null) { - return "https://" + hostName; - } - - return "http://localhost:8080"; - } - @@ -1893,7 +1863,7 @@ private static void createVarDDI(XMLStreamWriter xmlw, DataVariable dv, FileMeta } private static void createFileDscr(XMLStreamWriter xmlw, DatasetVersion datasetVersion) throws XMLStreamException { - String dataverseUrl = getDataverseSiteUrl(); + String dataverseUrl = SystemConfig.getDataverseSiteUrlStatic(); for (FileMetadata fileMetadata : datasetVersion.getFileMetadatas()) { DataFile dataFile = fileMetadata.getDataFile(); diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/Xrecord.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/Xrecord.java index 7e115c78f06..4485b798658 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/Xrecord.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/Xrecord.java @@ -8,14 +8,12 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.export.ExportException; import edu.harvard.iq.dataverse.export.ExportService; -import static edu.harvard.iq.dataverse.util.SystemConfig.FQDN; -import static edu.harvard.iq.dataverse.util.SystemConfig.SITE_URL; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.net.InetAddress; -import java.net.UnknownHostException; + +import edu.harvard.iq.dataverse.util.SystemConfig; import org.apache.poi.util.ReplacingInputStream; /** @@ -149,7 +147,7 @@ private void writeMetadataStream(InputStream inputStream, OutputStream outputStr private String customMetadataExtensionRef(String identifier) { String ret = "<" + METADATA_FIELD + " directApiCall=\"" - + getDataverseSiteUrl() + + SystemConfig.getDataverseSiteUrlStatic() + DATAVERSE_EXTENDED_METADATA_API + "?exporter=" + DATAVERSE_EXTENDED_METADATA_FORMAT @@ -164,21 +162,4 @@ private String customMetadataExtensionRef(String identifier) { private boolean isExtendedDataverseMetadataMode(String formatName) { return DATAVERSE_EXTENDED_METADATA_FORMAT.equals(formatName); } - - private String getDataverseSiteUrl() { - String hostUrl = System.getProperty(SITE_URL); - if (hostUrl != null && !"".equals(hostUrl)) { - return hostUrl; - } - String hostName = System.getProperty(FQDN); - if (hostName == null) { - try { - hostName = InetAddress.getLocalHost().getCanonicalHostName(); - } catch (UnknownHostException e) { - return null; - } - } - hostUrl = "https://" + hostName; - return hostUrl; - } } diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index 223e4b86da9..8d2832980cc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -41,6 +41,8 @@ public enum JvmSettings { // GENERAL SETTINGS VERSION(PREFIX, "version"), BUILD(PREFIX, "build"), + FQDN(PREFIX, "fqdn"), + SITE_URL(PREFIX, "siteUrl"), ; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index bd27405fae5..635f47c5800 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -8,6 +8,7 @@ import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinAuthenticationProvider; import edu.harvard.iq.dataverse.authorization.providers.oauth2.AbstractOAuth2AuthenticationProvider; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.validation.PasswordValidatorUtil; import java.io.FileInputStream; @@ -23,6 +24,7 @@ import java.util.List; import java.util.Map; import java.util.MissingResourceException; +import java.util.Optional; import java.util.Properties; import java.util.ResourceBundle; import java.util.logging.Logger; @@ -61,23 +63,7 @@ public class SystemConfig { AuthenticationServiceBean authenticationService; public static final String DATAVERSE_PATH = "/dataverse/"; - - /** - * A JVM option for the advertised fully qualified domain name (hostname) of - * the Dataverse installation, such as "dataverse.example.com", which may - * differ from the hostname that the server knows itself as. - * - * The equivalent in DVN 3.x was "dvn.inetAddress". - */ - public static final String FQDN = "dataverse.fqdn"; - - /** - * A JVM option for specifying the "official" URL of the site. - * Unlike the FQDN option above, this would be a complete URL, - * with the protocol, port number etc. - */ - public static final String SITE_URL = "dataverse.siteUrl"; - + /** * A JVM option for where files are stored on the file system. */ @@ -340,32 +326,58 @@ public static int getMinutesUntilPasswordResetTokenExpires() { } /** - * The "official", designated URL of the site; - * can be defined as a complete URL; or derived from the - * "official" hostname. If none of these options is set, - * defaults to the InetAddress.getLocalHOst() and https; - * These are legacy JVM options. Will be eventualy replaced - * by the Settings Service configuration. + * Lookup (or construct) the designated URL of this instance from configuration. + * + * Can be defined as a complete URL via dataverse.siteUrl; or derived from the hostname + * dataverse.fqdn and HTTPS. If none of these options is set, defaults to the + * {@link InetAddress#getLocalHost} and HTTPS. + * + * NOTE: This method does not provide any validation. + * TODO: The behaviour of this method is subject to a later change, see + * https://github.com/IQSS/dataverse/issues/6636 + * + * @return The designated URL of this instance as per configuration. */ public String getDataverseSiteUrl() { return getDataverseSiteUrlStatic(); } + /** + * Lookup (or construct) the designated URL of this instance from configuration. + * + * Can be defined as a complete URL via dataverse.siteUrl; or derived from the hostname + * dataverse.fqdn and HTTPS. If none of these options is set, defaults to the + * {@link InetAddress#getLocalHost} and HTTPS. + * + * NOTE: This method does not provide any validation. + * TODO: The behaviour of this method is subject to a later change, see + * https://github.com/IQSS/dataverse/issues/6636 + * + * @return The designated URL of this instance as per configuration. + */ public static String getDataverseSiteUrlStatic() { - String hostUrl = System.getProperty(SITE_URL); - if (hostUrl != null && !"".equals(hostUrl)) { - return hostUrl; + // If dataverse.siteUrl has been configured, simply return it + Optional siteUrl = JvmSettings.SITE_URL.lookupOptional(); + if (siteUrl.isPresent()) { + return siteUrl.get(); } - String hostName = System.getProperty(FQDN); - if (hostName == null) { - try { - hostName = InetAddress.getLocalHost().getCanonicalHostName(); - } catch (UnknownHostException e) { - return null; - } + + // Other wise try to lookup dataverse.fqdn setting and default to HTTPS + Optional fqdn = JvmSettings.FQDN.lookupOptional(); + if (fqdn.isPresent()) { + return "https://" + fqdn.get(); + } + + // Last resort - get the servers local name and use it. + // BEWARE - this is dangerous. + // 1) A server might have a different name than your repository URL. + // 2) The underlying reverse DNS lookup might point to a different name than your repository URL. + // 3) If this server has multiple IPs assigned, which one will it be for the lookup? + try { + return "https://" + InetAddress.getLocalHost().getCanonicalHostName(); + } catch (UnknownHostException e) { + return null; } - hostUrl = "https://" + hostName; - return hostUrl; } /** @@ -375,22 +387,6 @@ public String getPageURLWithQueryString() { return PrettyContext.getCurrentInstance().getRequestURL().toURL() + PrettyContext.getCurrentInstance().getRequestQueryString().toQueryString(); } - /** - * The "official" server's fully-qualified domain name: - */ - public String getDataverseServer() { - // still reliese on a JVM option: - String fqdn = System.getProperty(FQDN); - if (fqdn == null) { - try { - fqdn = InetAddress.getLocalHost().getCanonicalHostName(); - } catch (UnknownHostException e) { - return null; - } - } - return fqdn; - } - public String getGuidesBaseUrl() { String saneDefault = "https://guides.dataverse.org"; String guidesBaseUrl = settingsService.getValueForKey(SettingsServiceBean.Key.GuidesBaseUrl, saneDefault); diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties index 16298d83118..a9ee8236c7e 100644 --- a/src/main/resources/META-INF/microprofile-config.properties +++ b/src/main/resources/META-INF/microprofile-config.properties @@ -3,6 +3,11 @@ dataverse.version=${project.version} dataverse.build= +# Default only for containers! (keep mimicking the current behaviour - +# changing that is part of https://github.com/IQSS/dataverse/issues/6636) +%ct.dataverse.fqdn=localhost +%ct.dataverse.siteUrl=http://${dataverse.fqdn}:8080 + # DATABASE dataverse.db.host=localhost dataverse.db.port=5432 diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index b5453e75fe5..7119dfaf834 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -6,9 +6,9 @@ import edu.harvard.iq.dataverse.license.LicenseServiceBean; import edu.harvard.iq.dataverse.mocks.MockDatasetFieldSvc; -import static edu.harvard.iq.dataverse.util.SystemConfig.SITE_URL; import static edu.harvard.iq.dataverse.util.SystemConfig.FILES_HIDE_SCHEMA_DOT_ORG_DOWNLOAD_URLS; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.json.JsonParser; import edu.harvard.iq.dataverse.util.json.JsonUtil; @@ -31,6 +31,8 @@ import javax.json.Json; import javax.json.JsonObject; import javax.json.JsonReader; + +import edu.harvard.iq.dataverse.util.testing.JvmSetting; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Test; @@ -64,6 +66,7 @@ public static void tearDownClass() { * Test of exportDataset method, of class SchemaDotOrgExporter. */ @Test + @JvmSetting(key = JvmSettings.SITE_URL, value = "https://librascholar.org") public void testExportDataset() throws Exception { File datasetVersionJson = new File("src/test/resources/json/dataset-finch2.json"); String datasetVersionAsJson = new String(Files.readAllBytes(Paths.get(datasetVersionJson.getAbsolutePath()))); @@ -92,7 +95,6 @@ public void testExportDataset() throws Exception { Dataverse dataverse = new Dataverse(); dataverse.setName("LibraScholar"); dataset.setOwner(dataverse); - System.setProperty(SITE_URL, "https://librascholar.org"); boolean hideFileUrls = false; if (hideFileUrls) { System.setProperty(FILES_HIDE_SCHEMA_DOT_ORG_DOWNLOAD_URLS, "true"); From 3f19c121dee708831d48c83a4fc819986379e819 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 27 Jun 2022 15:05:57 +0200 Subject: [PATCH 153/608] docs(settings): update fqdn and siteUrl desc - Notes about MPCONFIG usage. - Rewording to make it more clear how this shall be used. --- .../source/installation/config.rst | 44 ++++++++++++++----- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index ab0bad70206..65912e77245 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1376,30 +1376,52 @@ When changing values these values with ``asadmin``, you'll need to delete the ol It's also possible to change these values by stopping Payara, editing ``payara5/glassfish/domains/domain1/config/domain.xml``, and restarting Payara. +.. _dataverse.fqdn: + dataverse.fqdn ++++++++++++++ -If the Dataverse installation has multiple DNS names, this option specifies the one to be used as the "official" host name. For example, you may want to have dataverse.example.edu, and not the less appealing server-123.socsci.example.edu to appear exclusively in all the registered global identifiers, Data Deposit API records, etc. +The URL to access your Dataverse installation gets used in multiple places: + +- Email confirmation links +- Password reset links +- Generating a Private URL +- PID minting +- Exporting to Schema.org format (and showing JSON-LD in HTML's tag) +- Exporting to DDI format +- Which Dataverse installation an "external tool" should return to +- URLs embedded in SWORD API responses +- ... -The password reset feature requires ``dataverse.fqdn`` to be configured. +Usually it will follow the pattern ``https:///``. +The FQDN part of the your Dataverse installation URL can be determined by setting ``dataverse.fqdn``. -.. note:: +**Notes:** - Do note that whenever the system needs to form a service URL, by default, it will be formed with ``https://`` and port 443. I.e., - ``https://{dataverse.fqdn}/`` - If that does not suit your setup, you can define an additional option, ``dataverse.siteUrl``, explained below. +- The URL will default to using ``https://`` and no additional port information. If that does not suit your setup, you + can define an additional option, ``dataverse.siteUrl``, :ref:`explained below `, which always + takes precedence. +- Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_FQDN``. + Defaults to ``localhost`` when used with ``mp.config.profile=ct`` .. _dataverse.siteUrl: dataverse.siteUrl +++++++++++++++++ -.. note:: +Some environments may require using a different URL pattern to access your installation. You might need to use +HTTP without "S", a non-standard port and so on. This is especially useful in development or testing environments. + +You can provide a custom tailored site URL via ``dataverse.siteUrl``, which always takes precedence. +Example: ``dataverse.siteUrl=http://localhost:8080`` + +**Notes:** + +- This setting may be used in combination with variable replacement, referencing :ref:`dataverse.fqdn` with + ``./asadmin create-jvm-options "\-Ddataverse.siteUrl=http\://\${dataverse.fqdn}\:8080"`` +- Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SITEURL``. + Defaults to ``http://${dataverse.fqdn}:8080`` when used with ``mp.config.profile=ct`` - and specify the protocol and port number you would prefer to be used to advertise the URL for your Dataverse installation. - For example, configured in domain.xml: - ``-Ddataverse.fqdn=dataverse.example.edu`` - ``-Ddataverse.siteUrl=http://${dataverse.fqdn}:8080`` dataverse.files.directory +++++++++++++++++++++++++ From f7e7e4aed8e2e089ac7ce55bb583795230d6849e Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 22 Jun 2022 18:22:08 +0200 Subject: [PATCH 154/608] refactor(settings): replace lookups of dataverse.files.directory with MPCONFIG #7000 - Adding dataverse.files.directory equivalent to JvmSettings - Remove all System.getPropert("dataverse.files.directory") or similar - Add default with /tmp/dataverse via microprofile-config.properties as formerly seen at FileUtil and Dataset only - Refactor SwordConfigurationImpl to reuse the NoSuchElementException thrown by MPCONFIG - Refactor GoogleCloudSubmitToArchiveCommand to use the JvmSettings.lookup and create file stream in try-with-resources --- .../edu/harvard/iq/dataverse/Dataset.java | 9 ++-- .../iq/dataverse/EditDatafilesPage.java | 7 ++- .../datadeposit/SwordConfigurationImpl.java | 52 +++++++++---------- .../filesystem/FileRecordJobListener.java | 7 ++- .../importer/filesystem/FileRecordReader.java | 9 ++-- .../GoogleCloudSubmitToArchiveCommand.java | 31 +++++------ .../impl/ImportFromFileSystemCommand.java | 48 +++++++++-------- .../iq/dataverse/settings/JvmSettings.java | 4 ++ .../harvard/iq/dataverse/util/FileUtil.java | 8 ++- .../iq/dataverse/util/SystemConfig.java | 5 -- .../META-INF/microprofile-config.properties | 3 ++ 11 files changed, 94 insertions(+), 89 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index a4f82d41bac..e2f00d0b54b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -33,6 +33,8 @@ import javax.persistence.Table; import javax.persistence.Temporal; import javax.persistence.TemporalType; + +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -528,11 +530,8 @@ private Collection getCategoryNames() { @Deprecated public Path getFileSystemDirectory() { Path studyDir = null; - - String filesRootDirectory = System.getProperty("dataverse.files.directory"); - if (filesRootDirectory == null || filesRootDirectory.equals("")) { - filesRootDirectory = "/tmp/files"; - } + + String filesRootDirectory = JvmSettings.FILES_DIRECTORY.lookup(); if (this.getAlternativePersistentIndentifiers() != null && !this.getAlternativePersistentIndentifiers().isEmpty()) { for (AlternativePersistentIdentifier api : this.getAlternativePersistentIndentifiers()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index f53e2377a69..a895c90dabe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -31,6 +31,7 @@ import edu.harvard.iq.dataverse.ingest.IngestUtil; import edu.harvard.iq.dataverse.license.LicenseServiceBean; import edu.harvard.iq.dataverse.search.IndexServiceBean; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.Setting; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; @@ -2425,10 +2426,8 @@ public boolean isTemporaryPreviewAvailable(String fileSystemId, String mimeType) return false; } - String filesRootDirectory = System.getProperty("dataverse.files.directory"); - if (filesRootDirectory == null || filesRootDirectory.isEmpty()) { - filesRootDirectory = "/tmp/files"; - } + // Retrieve via MPCONFIG. Has sane default /tmp/dataverse from META-INF/microprofile-config.properties + String filesRootDirectory = JvmSettings.FILES_DIRECTORY.lookup(); String fileSystemName = filesRootDirectory + "/temp/" + fileSystemId; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java index ce5f9415fcc..1e506c6a0b1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse.api.datadeposit; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.File; import java.util.Arrays; @@ -86,37 +87,32 @@ public boolean storeAndCheckBinary() { @Override public String getTempDirectory() { - String tmpFileDir = System.getProperty(SystemConfig.FILES_DIRECTORY); - if (tmpFileDir != null) { - String swordDirString = tmpFileDir + File.separator + "sword"; - File swordDirFile = new File(swordDirString); - /** - * @todo Do we really need this check? It seems like we do because - * if you create a dataset via the native API and then later try to - * upload a file via SWORD, the directory defined by - * dataverse.files.directory may not exist and we get errors deep in - * the SWORD library code. Could maybe use a try catch in the doPost - * method of our SWORDv2MediaResourceServlet. - */ - if (swordDirFile.exists()) { + // will throw a runtime exception when not found + String tmpFileDir = JvmSettings.FILES_DIRECTORY.lookup(); + + String swordDirString = tmpFileDir + File.separator + "sword"; + File swordDirFile = new File(swordDirString); + /** + * @todo Do we really need this check? It seems like we do because + * if you create a dataset via the native API and then later try to + * upload a file via SWORD, the directory defined by + * dataverse.files.directory may not exist and we get errors deep in + * the SWORD library code. Could maybe use a try catch in the doPost + * method of our SWORDv2MediaResourceServlet. + */ + if (swordDirFile.exists()) { + return swordDirString; + } else { + boolean mkdirSuccess = swordDirFile.mkdirs(); + if (mkdirSuccess) { + logger.info("Created directory " + swordDirString); return swordDirString; } else { - boolean mkdirSuccess = swordDirFile.mkdirs(); - if (mkdirSuccess) { - logger.info("Created directory " + swordDirString); - return swordDirString; - } else { - String msgForSwordUsers = ("Could not determine or create SWORD temp directory. Check logs for details."); - logger.severe(msgForSwordUsers + " Failed to create " + swordDirString); - // sadly, must throw RunTimeException to communicate with SWORD user - throw new RuntimeException(msgForSwordUsers); - } + String msgForSwordUsers = ("Could not determine or create SWORD temp directory. Check logs for details."); + logger.severe(msgForSwordUsers + " Failed to create " + swordDirString); + // sadly, must throw RunTimeException to communicate with SWORD user + throw new RuntimeException(msgForSwordUsers); } - } else { - String msgForSwordUsers = ("JVM option \"" + SystemConfig.FILES_DIRECTORY + "\" not defined. Check logs for details."); - logger.severe(msgForSwordUsers); - // sadly, must throw RunTimeException to communicate with SWORD user - throw new RuntimeException(msgForSwordUsers); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java index 6b82a665c17..ecb998c66af 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java @@ -57,6 +57,7 @@ import javax.inject.Named; import javax.servlet.http.HttpServletRequest; +import edu.harvard.iq.dataverse.settings.JvmSettings; import org.apache.commons.io.IOUtils; import java.io.FileReader; @@ -433,8 +434,10 @@ private void loadChecksumManifest() { manifest = checksumManifest; getJobLogger().log(Level.INFO, "Checksum manifest = " + manifest + " (FileSystemImportJob.xml property)"); } - // construct full path - String manifestAbsolutePath = System.getProperty("dataverse.files.directory") + + // Construct full path - retrieve base dir via MPCONFIG. + // (Has sane default /tmp/dataverse from META-INF/microprofile-config.properties) + String manifestAbsolutePath = JvmSettings.FILES_DIRECTORY.lookup() + SEP + dataset.getAuthority() + SEP + dataset.getIdentifier() + SEP + uploadFolder diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java index b3d3a7107a6..e3b67e9b0d2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java @@ -24,6 +24,7 @@ import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.batch.jobs.importer.ImportMode; +import edu.harvard.iq.dataverse.settings.JvmSettings; import org.apache.commons.io.filefilter.NotFileFilter; import org.apache.commons.io.filefilter.WildcardFileFilter; @@ -96,9 +97,11 @@ public void init() { @Override public void open(Serializable checkpoint) throws Exception { - - directory = new File(System.getProperty("dataverse.files.directory") - + SEP + dataset.getAuthority() + SEP + dataset.getIdentifier() + SEP + uploadFolder); + + // Retrieve via MPCONFIG. Has sane default /tmp/dataverse from META-INF/microprofile-config.properties + String baseDir = JvmSettings.FILES_DIRECTORY.lookup(); + + directory = new File(baseDir + SEP + dataset.getAuthority() + SEP + dataset.getIdentifier() + SEP + uploadFolder); // TODO: // The above goes directly to the filesystem directory configured by the // old "dataverse.files.directory" JVM option (otherwise used for temp diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java index 5d017173685..da2701a41e7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java @@ -1,16 +1,27 @@ package edu.harvard.iq.dataverse.engine.command.impl; +import com.google.auth.oauth2.ServiceAccountCredentials; +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.Bucket; +import com.google.cloud.storage.Storage; +import com.google.cloud.storage.StorageException; +import com.google.cloud.storage.StorageOptions; import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DatasetLock.Reason; +import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; +import org.apache.commons.codec.binary.Hex; +import javax.json.Json; +import javax.json.JsonObjectBuilder; +import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.PipedInputStream; @@ -21,17 +32,6 @@ import java.util.Map; import java.util.logging.Logger; -import javax.json.Json; -import javax.json.JsonObjectBuilder; - -import org.apache.commons.codec.binary.Hex; -import com.google.auth.oauth2.ServiceAccountCredentials; -import com.google.cloud.storage.Blob; -import com.google.cloud.storage.Bucket; -import com.google.cloud.storage.Storage; -import com.google.cloud.storage.StorageException; -import com.google.cloud.storage.StorageOptions; - @RequiredPermissions(Permission.PublishDataset) public class GoogleCloudSubmitToArchiveCommand extends AbstractSubmitToArchiveCommand implements Command { @@ -56,10 +56,11 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE); statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "Bag not transferred"); - try { - FileInputStream fis = new FileInputStream(System.getProperty("dataverse.files.directory") + System.getProperty("file.separator") + "googlecloudkey.json"); + String cloudKeyFile = JvmSettings.FILES_DIRECTORY.lookup() + File.separator + "googlecloudkey.json"; + + try (FileInputStream cloudKeyStream = new FileInputStream(cloudKeyFile)) { storage = StorageOptions.newBuilder() - .setCredentials(ServiceAccountCredentials.fromStream(fis)) + .setCredentials(ServiceAccountCredentials.fromStream(cloudKeyStream)) .setProjectId(projectName) .build() .getService(); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java index 64beba82450..5f31ea756eb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java @@ -12,17 +12,20 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; -import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; -import java.io.File; -import java.util.Properties; -import java.util.logging.Level; -import java.util.logging.Logger; +import edu.harvard.iq.dataverse.settings.JvmSettings; + import javax.batch.operations.JobOperator; import javax.batch.operations.JobSecurityException; import javax.batch.operations.JobStartException; import javax.batch.runtime.BatchRuntime; import javax.json.JsonObject; import javax.json.JsonObjectBuilder; +import java.io.File; +import java.util.Properties; +import java.util.logging.Level; +import java.util.logging.Logger; + +import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; @RequiredPermissions(Permission.EditDataset) public class ImportFromFileSystemCommand extends AbstractCommand { @@ -69,18 +72,20 @@ public JsonObject execute(CommandContext ctxt) throws CommandException { logger.info(error); throw new IllegalCommandException(error, this); } - File directory = new File(System.getProperty("dataverse.files.directory") - + File.separator + dataset.getAuthority() + File.separator + dataset.getIdentifier()); - // TODO: - // The above goes directly to the filesystem directory configured by the - // old "dataverse.files.directory" JVM option (otherwise used for temp - // files only, after the Multistore implementation (#6488). - // We probably want package files to be able to use specific stores instead. - // More importantly perhaps, the approach above does not take into account - // if the dataset may have an AlternativePersistentIdentifier, that may be - // designated isStorageLocationDesignator() - i.e., if a different identifer - // needs to be used to name the storage directory, instead of the main/current - // persistent identifier above. + + File directory = new File( + String.join(File.separator, JvmSettings.FILES_DIRECTORY.lookup(), + dataset.getAuthority(), dataset.getIdentifier())); + + // TODO: The above goes directly to the filesystem directory configured by the + // old "dataverse.files.directory" JVM option (otherwise used for temp + // files only, after the Multistore implementation (#6488). + // We probably want package files to be able to use specific stores instead. + // More importantly perhaps, the approach above does not take into account + // if the dataset may have an AlternativePersistentIdentifier, that may be + // designated isStorageLocationDesignator() - i.e., if a different identifer + // needs to be used to name the storage directory, instead of the main/current + // persistent identifier above. if (!isValidDirectory(directory)) { String error = "Dataset directory is invalid. " + directory; logger.info(error); @@ -93,11 +98,10 @@ public JsonObject execute(CommandContext ctxt) throws CommandException { throw new IllegalCommandException(error, this); } - File uploadDirectory = new File(System.getProperty("dataverse.files.directory") - + File.separator + dataset.getAuthority() + File.separator + dataset.getIdentifier() - + File.separator + uploadFolder); - // TODO: - // see the comment above. + File uploadDirectory = new File(String.join(File.separator, JvmSettings.FILES_DIRECTORY.lookup(), + dataset.getAuthority(), dataset.getIdentifier(), uploadFolder)); + + // TODO: see the comment above. if (!isValidDirectory(uploadDirectory)) { String error = "Upload folder is not a valid directory."; logger.info(error); diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index 223e4b86da9..12e5e311278 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -42,6 +42,10 @@ public enum JvmSettings { VERSION(PREFIX, "version"), BUILD(PREFIX, "build"), + // FILES SETTINGS + SCOPE_FILES(PREFIX, "files"), + FILES_DIRECTORY(SCOPE_FILES, "directory"), + ; private static final String SCOPE_SEPARATOR = "."; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 893c62b3cb0..a2c55d41613 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -40,6 +40,7 @@ import edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper; import edu.harvard.iq.dataverse.ingest.IngestableDataChecker; import edu.harvard.iq.dataverse.license.License; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.file.BagItFileHandler; import edu.harvard.iq.dataverse.util.file.CreateDataFileResult; import edu.harvard.iq.dataverse.util.file.BagItFileHandlerFactory; @@ -1389,11 +1390,8 @@ public static boolean canIngestAsTabular(String mimeType) { } public static String getFilesTempDirectory() { - String filesRootDirectory = System.getProperty("dataverse.files.directory"); - if (filesRootDirectory == null || filesRootDirectory.equals("")) { - filesRootDirectory = "/tmp/files"; - } - + + String filesRootDirectory = JvmSettings.FILES_DIRECTORY.lookup(); String filesTempDirectory = filesRootDirectory + "/temp"; if (!Files.exists(Paths.get(filesTempDirectory))) { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index bd27405fae5..e9313e70218 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -78,11 +78,6 @@ public class SystemConfig { */ public static final String SITE_URL = "dataverse.siteUrl"; - /** - * A JVM option for where files are stored on the file system. - */ - public static final String FILES_DIRECTORY = "dataverse.files.directory"; - /** * Some installations may not want download URLs to their files to be * available in Schema.org JSON-LD output. diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties index 16298d83118..ab219071767 100644 --- a/src/main/resources/META-INF/microprofile-config.properties +++ b/src/main/resources/META-INF/microprofile-config.properties @@ -3,6 +3,9 @@ dataverse.version=${project.version} dataverse.build= +# FILES +dataverse.files.directory=/tmp/dataverse + # DATABASE dataverse.db.host=localhost dataverse.db.port=5432 From 5c2c7022ad9f11234b0e33ddaf3a0aa2696ab154 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 22 Jun 2022 22:27:30 +0200 Subject: [PATCH 155/608] docs(settings): provide more detail for dataverse.files.directory --- doc/sphinx-guides/source/api/native-api.rst | 2 ++ doc/sphinx-guides/source/installation/config.rst | 16 +++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 339a291bf4d..6dd1bbab728 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -552,6 +552,8 @@ You should expect an HTTP 200 ("OK") response and JSON indicating the database I .. note:: Only a Dataverse installation account with superuser permissions is allowed to include files when creating a dataset via this API. Adding files this way only adds their file metadata to the database, you will need to manually add the physical files to the file system. +.. _api-import-dataset: + Import a Dataset into a Dataverse Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index ab0bad70206..89329ea3821 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -274,6 +274,8 @@ If you wish to change which store is used by default, you'll need to delete the It is also possible to set maximum file upload size limits per store. See the :ref:`:MaxFileUploadSizeInBytes` setting below. +.. _storage-files-dir: + File Storage ++++++++++++ @@ -1404,7 +1406,19 @@ dataverse.siteUrl dataverse.files.directory +++++++++++++++++++++++++ -This is how you configure the path Dataverse uses for temporary files. (File store specific dataverse.files.\.directory options set the permanent data storage locations.) +Please provide an absolute path to a directory backed by some mounted file system. This directory is used for a number +of purposes: + +1. ``/temp`` after uploading, data is temporarily stored here for ingest and/or before + shipping to the final storage destination. +2. ``/sword`` a place to store uploads via the :doc:`../api/sword` before transfer + to final storage location and/or ingest. +3. ``//`` data location for file system imports, see + :ref:`api-import-dataset`. +4. ``/googlecloudkey.json`` used with :ref:`Google Cloud Configuration` for BagIt exports. + +This directory might also be used for permanent storage of data, but this setting is independent from +:ref:`storage-files-dir` configuration. dataverse.auth.password-reset-timeout-in-minutes ++++++++++++++++++++++++++++++++++++++++++++++++ From d7ab9f6e5359356db3b01ab9e6f87347cf117fe7 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 27 Jun 2022 15:11:01 +0200 Subject: [PATCH 156/608] style: replace system prop 'file.separator' with File.separator --- .../batch/jobs/importer/filesystem/FileRecordJobListener.java | 3 ++- .../batch/jobs/importer/filesystem/FileRecordReader.java | 2 +- .../java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java index ecb998c66af..7837474fc27 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java @@ -60,6 +60,7 @@ import edu.harvard.iq.dataverse.settings.JvmSettings; import org.apache.commons.io.IOUtils; +import java.io.File; import java.io.FileReader; import java.io.IOException; import java.sql.Timestamp; @@ -80,7 +81,7 @@ @Dependent public class FileRecordJobListener implements ItemReadListener, StepListener, JobListener { - public static final String SEP = System.getProperty("file.separator"); + public static final String SEP = File.separator; private static final UserNotification.Type notifyType = UserNotification.Type.FILESYSTEMIMPORT; diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java index e3b67e9b0d2..a4f8ffd2378 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java @@ -55,7 +55,7 @@ @Dependent public class FileRecordReader extends AbstractItemReader { - public static final String SEP = System.getProperty("file.separator"); + public static final String SEP = File.separator; @Inject JobContext jobContext; diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java b/src/main/java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java index 4a778dc7abb..a2f76ca953d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java @@ -154,8 +154,8 @@ public static Logger getJobLogger(String jobId) { try { Logger jobLogger = Logger.getLogger("job-"+jobId); FileHandler fh; - String logDir = System.getProperty("com.sun.aas.instanceRoot") + System.getProperty("file.separator") - + "logs" + System.getProperty("file.separator") + "batch-jobs" + System.getProperty("file.separator"); + String logDir = System.getProperty("com.sun.aas.instanceRoot") + File.separator + + "logs" + File.separator + "batch-jobs" + File.separator; checkCreateLogDirectory( logDir ); fh = new FileHandler(logDir + "job-" + jobId + ".log"); logger.log(Level.INFO, "JOB LOG: " + logDir + "job-" + jobId + ".log"); From c231fb05e933e8a04b8ca9abdee19e723abc4336 Mon Sep 17 00:00:00 2001 From: cstr Date: Mon, 19 Sep 2022 08:45:02 +0800 Subject: [PATCH 157/608] Update OpenAireExportUtilTest.java override --- .../edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java index 40664527cfc..7f7cc203506 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java @@ -609,8 +609,6 @@ public void testWriteRelatedIdentifierElement() throws XMLStreamException, IOExc + "RelatedPublicationIDNumber1" + "" + "RelatedPublicationIDNumber2" - + "" - + "RelatedPublicationIDNumber3" + "", stringWriter.toString()); } From 1d47db41e4c7e2c92fe17eb61dff140808042b25 Mon Sep 17 00:00:00 2001 From: cstr Date: Mon, 19 Sep 2022 08:46:33 +0800 Subject: [PATCH 158/608] Update citation.properties --- src/main/java/propertyFiles/citation.properties | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/propertyFiles/citation.properties b/src/main/java/propertyFiles/citation.properties index edb418b0416..b382f8a5a1e 100644 --- a/src/main/java/propertyFiles/citation.properties +++ b/src/main/java/propertyFiles/citation.properties @@ -251,7 +251,7 @@ controlledvocabulary.subject.social_sciences=Social Sciences controlledvocabulary.subject.other=Other controlledvocabulary.publicationIDType.ark=ark controlledvocabulary.publicationIDType.arxiv=arXiv -controlledvocabulary.publicationIDType.CSTR=CSTR +controlledvocabulary.publicationIDType.cstr=CSTR controlledvocabulary.publicationIDType.bibcode=bibcode controlledvocabulary.publicationIDType.doi=doi controlledvocabulary.publicationIDType.ean13=ean13 @@ -346,7 +346,7 @@ controlledvocabulary.language.galician=Galician controlledvocabulary.language.georgian=Georgian controlledvocabulary.language.german=German controlledvocabulary.language.greek_(modern)=Greek (modern) -controlledvocabulary.language.guarani=Guaraní +controlledvocabulary.language.guarani=Guaraní controlledvocabulary.language.gujarati=Gujarati controlledvocabulary.language.haitian,_haitian_creole=Haitian, Haitian Creole controlledvocabulary.language.hausa=Hausa @@ -406,7 +406,7 @@ controlledvocabulary.language.navajo,_navaho=Navajo, Navaho controlledvocabulary.language.northern_ndebele=Northern Ndebele controlledvocabulary.language.nepali=Nepali controlledvocabulary.language.ndonga=Ndonga -controlledvocabulary.language.norwegian_bokmal=Norwegian Bokmål +controlledvocabulary.language.norwegian_bokmal=Norwegian BokmÃ¥l controlledvocabulary.language.norwegian_nynorsk=Norwegian Nynorsk controlledvocabulary.language.norwegian=Norwegian controlledvocabulary.language.nuosu=Nuosu @@ -468,7 +468,7 @@ controlledvocabulary.language.urdu=Urdu controlledvocabulary.language.uzbek=Uzbek controlledvocabulary.language.venda=Venda controlledvocabulary.language.vietnamese=Vietnamese -controlledvocabulary.language.volapuk=Volapük +controlledvocabulary.language.volapuk=Volapük controlledvocabulary.language.walloon=Walloon controlledvocabulary.language.welsh=Welsh controlledvocabulary.language.wolof=Wolof @@ -478,4 +478,4 @@ controlledvocabulary.language.yiddish=Yiddish controlledvocabulary.language.yoruba=Yoruba controlledvocabulary.language.zhuang,_chuang=Zhuang, Chuang controlledvocabulary.language.zulu=Zulu -controlledvocabulary.language.not_applicable=Not applicable \ No newline at end of file +controlledvocabulary.language.not_applicable=Not applicable From 8093406b5a700582a11cf7d2b016c564757ec479 Mon Sep 17 00:00:00 2001 From: cstr Date: Mon, 19 Sep 2022 08:48:22 +0800 Subject: [PATCH 159/608] Update dataset-all-defaults.txt --- .../edu/harvard/iq/dataverse/export/dataset-all-defaults.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt b/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt index 7348d54b7dd..a3f0dffc767 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt +++ b/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt @@ -16,7 +16,7 @@ "createTime": "2015-09-24T16:47:50Z", "license": { "name": "CC0 1.0", - "uri": "https://creativecommons.org/publicdomain/zero/1.0/" + "uri": "http://creativecommons.org/publicdomain/zero/1.0/" }, "metadataBlocks": { "citation": { From 744030bd1c07d7ee62ff1d9aecd60f7b256dd57b Mon Sep 17 00:00:00 2001 From: cstr Date: Mon, 19 Sep 2022 09:49:22 +0800 Subject: [PATCH 160/608] Update SchemaDotOrgExporterTest.java --- .../iq/dataverse/export/SchemaDotOrgExporterTest.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index d21d24a5432..644848d2776 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -406,10 +406,10 @@ private static void mockDatasetFieldSvc() { new ControlledVocabularyValue(1l, "ark", publicationIdTypes), new ControlledVocabularyValue(2l, "arXiv", publicationIdTypes), new ControlledVocabularyValue(3l, "bibcode", publicationIdTypes), - new ControlledVocabularyValue(4l, "doi", publicationIdTypes), - new ControlledVocabularyValue(5l, "ean13", publicationIdTypes), - new ControlledVocabularyValue(6l, "handle", publicationIdTypes), - new ControlledVocabularyValue(17l, "cstr", publicationIdTypes) + new ControlledVocabularyValue(4l, "cstr", publicationIdTypes), + new ControlledVocabularyValue(5l, "doi", publicationIdTypes), + new ControlledVocabularyValue(6l, "ean13", publicationIdTypes), + new ControlledVocabularyValue(7l, "handle", publicationIdTypes) // Etc. There are more. )); publicationChildTypes.add(datasetFieldTypeSvc.add(publicationIdTypes)); From 2af2d7c6106d890cb7d01872ed66b99143929385 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 19 Sep 2022 14:52:28 +0200 Subject: [PATCH 161/608] fix(test): make UrlTokenUtilTest not assume site url For unknown reasons, the test assumed the site url / fqdn to be "https://librascholar.org", which might be coming from some test order side effect. Now the test sets the site URL setting to have control over the generated data. On a related note, this meant to upgrade the test from JUnit4 to JUnit5 plus some minor code cleanups. --- .../iq/dataverse/util/UrlTokenUtilTest.java | 35 +++++++++++-------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/util/UrlTokenUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/UrlTokenUtilTest.java index ffc6b813045..782890627e1 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/UrlTokenUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/UrlTokenUtilTest.java @@ -6,24 +6,25 @@ import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.authorization.users.ApiToken; -import static org.junit.Assert.assertEquals; -import static org.mockito.Mockito.when; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; +import org.junit.jupiter.api.Test; import java.util.ArrayList; import java.util.List; -import org.junit.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.mockito.Mock; -import org.mockito.junit.jupiter.MockitoExtension; +import static org.junit.jupiter.api.Assertions.assertEquals; public class UrlTokenUtilTest { @Test + @JvmSetting(key = JvmSettings.SITE_URL, value = "https://foobar") public void testGetToolUrlWithOptionalQueryParameters() { - + // given + String siteUrl = "https://foobar"; + DataFile dataFile = new DataFile(); - dataFile.setId(42l); + dataFile.setId(42L); FileMetadata fmd = new FileMetadata(); DatasetVersion dv = new DatasetVersion(); Dataset ds = new Dataset(); @@ -31,20 +32,26 @@ public void testGetToolUrlWithOptionalQueryParameters() { ds.setGlobalId(new GlobalId("doi:10.5072/FK2ABCDEF")); dv.setDataset(ds); fmd.setDatasetVersion(dv); - List fmdl = new ArrayList(); + List fmdl = new ArrayList<>(); fmdl.add(fmd); dataFile.setFileMetadatas(fmdl); + ApiToken apiToken = new ApiToken(); apiToken.setTokenString("7196b5ce-f200-4286-8809-03ffdbc255d7"); + + // when & then 1/2 URLTokenUtil urlTokenUtil = new URLTokenUtil(dataFile, apiToken, fmd, "en"); assertEquals("en", urlTokenUtil.replaceTokensWithValues("{localeCode}")); assertEquals("42 test en", urlTokenUtil.replaceTokensWithValues("{fileId} test {localeCode}")); assertEquals("42 test en", urlTokenUtil.replaceTokensWithValues("{fileId} test {localeCode}")); - - assertEquals("https://librascholar.org/api/files/42/metadata?key=" + apiToken.getTokenString(), urlTokenUtil.replaceTokensWithValues("{siteUrl}/api/files/{fileId}/metadata?key={apiToken}")); - + assertEquals( siteUrl + "/api/files/42/metadata?key=" + apiToken.getTokenString(), + urlTokenUtil.replaceTokensWithValues("{siteUrl}/api/files/{fileId}/metadata?key={apiToken}")); + + // when & then 2/2 URLTokenUtil urlTokenUtil2 = new URLTokenUtil(ds, apiToken, "en"); - assertEquals("https://librascholar.org/api/datasets/50?key=" + apiToken.getTokenString(), urlTokenUtil2.replaceTokensWithValues("{siteUrl}/api/datasets/{datasetId}?key={apiToken}")); - assertEquals("https://librascholar.org/api/datasets/:persistentId/?persistentId=doi:10.5072/FK2ABCDEF&key=" + apiToken.getTokenString(), urlTokenUtil2.replaceTokensWithValues("{siteUrl}/api/datasets/:persistentId/?persistentId={datasetPid}&key={apiToken}")); + assertEquals(siteUrl + "/api/datasets/50?key=" + apiToken.getTokenString(), + urlTokenUtil2.replaceTokensWithValues("{siteUrl}/api/datasets/{datasetId}?key={apiToken}")); + assertEquals(siteUrl + "/api/datasets/:persistentId/?persistentId=doi:10.5072/FK2ABCDEF&key=" + apiToken.getTokenString(), + urlTokenUtil2.replaceTokensWithValues("{siteUrl}/api/datasets/:persistentId/?persistentId={datasetPid}&key={apiToken}")); } } From cbc7f8af47e2dccfaa6d55e9f78c07166c2d3b5e Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 4 Jul 2022 18:54:46 +0200 Subject: [PATCH 162/608] feat(settings): add rserve properties via MPCONFIG #7000 --- .../edu/harvard/iq/dataverse/settings/JvmSettings.java | 8 ++++++++ .../resources/META-INF/microprofile-config.properties | 7 +++++++ 2 files changed, 15 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index 223e4b86da9..6c5131219ff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -42,6 +42,14 @@ public enum JvmSettings { VERSION(PREFIX, "version"), BUILD(PREFIX, "build"), + // RSERVE CONNECTION + SCOPE_RSERVE(PREFIX, "rserve"), + RSERVE_HOST(SCOPE_RSERVE, "host"), + RSERVE_PORT(SCOPE_RSERVE, "port", "dataverse.ingest.rserve.port"), + RSERVE_USER(SCOPE_RSERVE, "user"), + RSERVE_PASSWORD(SCOPE_RSERVE, "password"), + RSERVE_TEMPDIR(SCOPE_RSERVE, "tempdir"), + ; private static final String SCOPE_SEPARATOR = "."; diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties index 16298d83118..c7b907edb6c 100644 --- a/src/main/resources/META-INF/microprofile-config.properties +++ b/src/main/resources/META-INF/microprofile-config.properties @@ -8,3 +8,10 @@ dataverse.db.host=localhost dataverse.db.port=5432 dataverse.db.user=dataverse dataverse.db.name=dataverse + +# RSERVE +dataverse.rserve.host=localhost +dataverse.rserve.port=6311 +dataverse.rserve.username=rserve +dataverse.rserve.password=rserve +dataverse.rserve.tempdir=/tmp From 6732b4bc578ad0b2f410dbed6d482e377c86fde9 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 4 Jul 2022 18:54:57 +0200 Subject: [PATCH 163/608] refactor(rserve): introduce MPCONFIG settings retrieval #7000 1. Instead of reading the configuration from system properties only, switch to using MPCONFIG and JvmSettings fluent API. 2. Instead of saving the configuration in a static variable, retrieve the config from the constructor. This has 2 advantages: 1) no worries about execution order and MPCONFIG not yet ready, 2) update the readers with new config settings when changed (no need to restart). --- .../impl/plugins/rdata/RDATAFileReader.java | 35 ++++------ .../rserve/RemoteDataFrameService.java | 68 ++++++------------- 2 files changed, 33 insertions(+), 70 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java index c2899b29d1f..1ec0c389049 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java @@ -31,6 +31,7 @@ import javax.inject.Inject; // Rosuda Wrappers and Methods for R-calls to Rserve +import edu.harvard.iq.dataverse.settings.JvmSettings; import org.rosuda.REngine.REXP; import org.rosuda.REngine.REXPMismatchException; import org.rosuda.REngine.RList; @@ -88,10 +89,10 @@ public class RDATAFileReader extends TabularDataFileReader { static private String RSCRIPT_WRITE_DVN_TABLE = ""; // RServe static variables - private static String RSERVE_HOST = System.getProperty("dataverse.rserve.host"); - private static String RSERVE_USER = System.getProperty("dataverse.rserve.user"); - private static String RSERVE_PASSWORD = System.getProperty("dataverse.rserve.password"); - private static int RSERVE_PORT; + private final String RSERVE_HOST; + private final int RSERVE_PORT; + private final String RSERVE_USER; + private final String RSERVE_PASSWORD; // TODO: // we're not using these time/data formats for anything, are we? @@ -138,24 +139,6 @@ public class RDATAFileReader extends TabularDataFileReader { * This is primarily to construct the R-Script */ static { - /* - * Set defaults fallbacks for class properties - */ - if (RSERVE_HOST == null) - RSERVE_HOST = "localhost"; - - if (RSERVE_USER == null) - RSERVE_USER = "rserve"; - - if (RSERVE_PASSWORD == null) - RSERVE_PASSWORD = "rserve"; - - if (System.getProperty("dataverse.ingest.rserve.port") == null) - RSERVE_PORT = 6311; - else - RSERVE_PORT = Integer.parseInt(System.getProperty("dataverse.rserve.port")); - - // Load R Scripts into memory, so that we can run them via R-serve RSCRIPT_WRITE_DVN_TABLE = readLocalResource("scripts/write.table.R"); RSCRIPT_GET_DATASET = readLocalResource("scripts/get.dataset.R"); @@ -451,7 +434,13 @@ public RDATAFileReader(TabularDataFileReaderSpi originator) { super(originator); - + // These settings have sane defaults in resources/META-INF/microprofile-config.properties, + // ready to be overridden by a sysadmin. Every time a file would be read with this file reader, + // a new reader will be created, reading from the cached config source settings with minimal overhead. + this.RSERVE_HOST = JvmSettings.RSERVE_HOST.lookup(); + this.RSERVE_PORT = JvmSettings.RSERVE_PORT.lookup(Integer.class); + this.RSERVE_USER = JvmSettings.RSERVE_USER.lookup(); + this.RSERVE_PASSWORD = JvmSettings.RSERVE_PASSWORD.lookup(); LOG.fine("RDATAFileReader: INSIDE RDATAFileReader"); diff --git a/src/main/java/edu/harvard/iq/dataverse/rserve/RemoteDataFrameService.java b/src/main/java/edu/harvard/iq/dataverse/rserve/RemoteDataFrameService.java index f13b6f11434..df2e44ecb27 100644 --- a/src/main/java/edu/harvard/iq/dataverse/rserve/RemoteDataFrameService.java +++ b/src/main/java/edu/harvard/iq/dataverse/rserve/RemoteDataFrameService.java @@ -41,6 +41,7 @@ import java.util.Set; import java.util.logging.Logger; +import edu.harvard.iq.dataverse.settings.JvmSettings; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.RandomStringUtils; @@ -72,57 +73,33 @@ public class RemoteDataFrameService { private static String TMP_TABDATA_FILE_EXT = ".tab"; private static String TMP_RDATA_FILE_EXT = ".RData"; - - private static String RSERVE_HOST = null; - private static String RSERVE_USER = null; - private static String RSERVE_PWD = null; - private static int RSERVE_PORT = -1; + + // These settings have sane defaults in resources/META-INF/microprofile-config.properties, + // ready to be overridden by a sysadmin + private final String RSERVE_HOST; + private final String RSERVE_USER; + private final String RSERVE_PWD; + private final int RSERVE_PORT; + private final String RSERVE_TMP_DIR; private static String DATAVERSE_R_FUNCTIONS = "scripts/dataverse_r_functions.R"; private static String DATAVERSE_R_PREPROCESSING = "scripts/preprocess.R"; - - public static String LOCAL_TEMP_DIR = System.getProperty("java.io.tmpdir"); - public static String RSERVE_TMP_DIR=null; public String PID = null; public String tempFileNameIn = null; public String tempFileNameOut = null; - - static { - - RSERVE_TMP_DIR = System.getProperty("dataverse.rserve.tempdir"); - - if (RSERVE_TMP_DIR == null){ - RSERVE_TMP_DIR = "/tmp/"; - } - - RSERVE_HOST = System.getProperty("dataverse.rserve.host"); - if (RSERVE_HOST == null){ - RSERVE_HOST= "localhost"; - } - - RSERVE_USER = System.getProperty("dataverse.rserve.user"); - if (RSERVE_USER == null){ - RSERVE_USER= "rserve"; - } - - RSERVE_PWD = System.getProperty("dataverse.rserve.password"); - if (RSERVE_PWD == null){ - RSERVE_PWD= "rserve"; - } - - - if (System.getProperty("dataverse.rserve.port") == null ){ - RSERVE_PORT= 6311; - } else { - RSERVE_PORT = Integer.parseInt(System.getProperty("dataverse.rserve.port")); - } - - } - - public RemoteDataFrameService() { + // These settings have sane defaults in resources/META-INF/microprofile-config.properties, + // ready to be overridden by a sysadmin. Config sources have their own caches, so adding + // these here means the setting can be changed dynamically without too much overhead. + this.RSERVE_HOST = JvmSettings.RSERVE_HOST.lookup(); + this.RSERVE_USER = JvmSettings.RSERVE_USER.lookup(); + this.RSERVE_PWD = JvmSettings.RSERVE_PASSWORD.lookup(); + this.RSERVE_PORT = JvmSettings.RSERVE_PORT.lookup(Integer.class); + this.RSERVE_TMP_DIR = JvmSettings.RSERVE_TEMPDIR.lookup(); + + // initialization PID = RandomStringUtils.randomNumeric(6); @@ -703,15 +680,12 @@ public Map runDataFrameRequest(RJobRequest jobRequest, RConnecti public File transferRemoteFile(RConnection connection, String targetFilename, String tmpFilePrefix, String tmpFileExt, int fileSize) { - // set up a local temp file: - + // set up a local temp file: File tmpResultFile = null; - String resultFile = tmpFilePrefix + PID + "." + tmpFileExt; - RFileInputStream rInStream = null; OutputStream outbr = null; try { - tmpResultFile = new File(LOCAL_TEMP_DIR, resultFile); + tmpResultFile = File.createTempFile(tmpFilePrefix + PID, "."+tmpFileExt); outbr = new BufferedOutputStream(new FileOutputStream(tmpResultFile)); // open the input stream rInStream = connection.openFile(targetFilename); From d951f99bfc12440766add7f13cc1afb84f557448 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 4 Jul 2022 19:05:21 +0200 Subject: [PATCH 164/608] fix(settings): align Rserve tempdir default to docs #7000 The docs said the default is "/tmp/Rserve", while the code had "/tmp". Changing the code default to the documented one. --- src/main/resources/META-INF/microprofile-config.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties index c7b907edb6c..8d2793eadbf 100644 --- a/src/main/resources/META-INF/microprofile-config.properties +++ b/src/main/resources/META-INF/microprofile-config.properties @@ -14,4 +14,4 @@ dataverse.rserve.host=localhost dataverse.rserve.port=6311 dataverse.rserve.username=rserve dataverse.rserve.password=rserve -dataverse.rserve.tempdir=/tmp +dataverse.rserve.tempdir=/tmp/Rserve From 507ae82a0b0674cce8d23f77a196894194396ea9 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 4 Jul 2022 19:06:53 +0200 Subject: [PATCH 165/608] docs(settings): add Rserve MPCONFIG to guide #7000 --- .../source/installation/config.rst | 43 ++++++++++++++++--- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index ab0bad70206..ae0d2cb0b26 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1465,27 +1465,60 @@ Can also be set via *MicroProfile Config API* sources, e.g. the environment vari dataverse.rserve.host +++++++++++++++++++++ -Host name for Rserve, used for tasks that require use of R (to ingest RData files and to save tabular data as RData frames). +Host name for Rserve, used for tasks that require use of R (to ingest RData +files and to save tabular data as RData frames). + +Defaults to ``localhost``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_RSERVE_HOST``. dataverse.rserve.port +++++++++++++++++++++ -Port number for Rserve, used for tasks that require use of R (to ingest RData files and to save tabular data as RData frames). +Port number for Rserve, used for tasks that require use of R (to ingest RData +files and to save tabular data as RData frames). + +Defaults to ``6311``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_RSERVE_PORT``. dataverse.rserve.user +++++++++++++++++++++ -Username for Rserve, used for tasks that require use of R (to ingest RData files and to save tabular data as RData frames). +Username for Rserve, used for tasks that require use of R (to ingest RData +files and to save tabular data as RData frames). + +Defaults to ``rserve``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_RSERVE_USER``. dataverse.rserve.password +++++++++++++++++++++++++ -Password for Rserve, used for tasks that require use of R (to ingest RData files and to save tabular data as RData frames). +Password for Rserve, used for tasks that require use of R (to ingest RData +files and to save tabular data as RData frames). + +Defaults to ``rserve``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_RSERVE_PASSWORD``. dataverse.rserve.tempdir ++++++++++++++++++++++++ -Temporary directory used by Rserve (defaults to /tmp/Rserv). Note that this location is local to the host on which Rserv is running (specified in ``dataverse.rserve.host`` above). When talking to Rserve, Dataverse needs to know this location in order to generate absolute path names of the files on the other end. +Temporary directory used by Rserve (defaults to /tmp/Rserv). Note that this +location is local to the host on which Rserv is running (specified in +``dataverse.rserve.host`` above). When talking to Rserve, Dataverse needs to +know this location in order to generate absolute path names of the files on the +other end. + +Defaults to ``/tmp``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_RSERVE_TEMPDIR``. .. _dataverse.dropbox.key: From c21082167d31c12354cab32544f5d7efeb100255 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 19 Sep 2022 13:48:20 -0400 Subject: [PATCH 166/608] add space --- doc/sphinx-guides/source/api/search.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/search.rst b/doc/sphinx-guides/source/api/search.rst index 149ad132f79..fdebfdb8b10 100755 --- a/doc/sphinx-guides/source/api/search.rst +++ b/doc/sphinx-guides/source/api/search.rst @@ -735,7 +735,7 @@ Output from iteration example Geospatial Indexing ------------------- -Dataverse indexes the Geospatial Bounding Box field from the Geospatial metadatablock as a solr.BBoxField enabling `Spatial Search`_. This capability is not yet exposed through the Dataverse API or UI but can be accessed by trusted applications with direct solr access. +Dataverse indexes the Geospatial Bounding Box field from the Geospatial metadatablock as a solr.BBoxField enabling `Spatial Search `_. This capability is not yet exposed through the Dataverse API or UI but can be accessed by trusted applications with direct solr access. For example, a query of the form .. code-block:: none From 139f0f7d5c762533e4fdcec5dc65e487170e570f Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 8 Mar 2022 17:27:51 +0100 Subject: [PATCH 167/608] feat(ct-base): add new base container image in submodule --- modules/container-base/pom.xml | 90 ++++++++ .../container-base/src/main/docker/Dockerfile | 204 ++++++++++++++++++ .../src/main/docker/assembly.xml | 17 ++ .../src/main/docker/scripts/entrypoint.sh | 17 ++ .../init_1_generate_deploy_commands.sh | 65 ++++++ .../main/docker/scripts/startInForeground.sh | 89 ++++++++ modules/dataverse-parent/pom.xml | 51 +++++ 7 files changed, 533 insertions(+) create mode 100644 modules/container-base/pom.xml create mode 100644 modules/container-base/src/main/docker/Dockerfile create mode 100644 modules/container-base/src/main/docker/assembly.xml create mode 100644 modules/container-base/src/main/docker/scripts/entrypoint.sh create mode 100644 modules/container-base/src/main/docker/scripts/init_1_generate_deploy_commands.sh create mode 100644 modules/container-base/src/main/docker/scripts/startInForeground.sh diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml new file mode 100644 index 00000000000..8cb7e1ac795 --- /dev/null +++ b/modules/container-base/pom.xml @@ -0,0 +1,90 @@ + + + 4.0.0 + + + edu.harvard.iq + dataverse-parent + ${revision} + ../dataverse-parent + + + io.gdcc + container-base + ${packaging.type} + Container Base Image + This module provides an application server base image to be decorated with the Dataverse app. + + + + + pom + + + + + ct + + docker-build + + + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + unpack + initialize + + unpack + + + + + fish.payara.distributions + payara + ${payara.version} + zip + false + ${project.build.directory} + + + + + + + + + + io.fabric8 + docker-maven-plugin + true + + + + base + %g/base:jdk${target.java.version} + ${ct.registry} + + Dockerfile + + openjdk:${target.java.version}-jre + + @ + + assembly.xml + + + + + + + + + + + \ No newline at end of file diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile new file mode 100644 index 00000000000..635fbd89142 --- /dev/null +++ b/modules/container-base/src/main/docker/Dockerfile @@ -0,0 +1,204 @@ +# Copyright 2019 Forschungszentrum Jülich GmbH +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +################################################################################################################ +# +# THIS FILE IS TO BE USED WITH MAVEN DOCKER BUILD: +# mvn -Pct clean package docker:build +# +################################################################################################################ +# +# Some commands used are inspired by https://github.com/payara/Payara/tree/master/appserver/extras/docker-images. +# Most parts origin from older versions of https://github.com/gdcc/dataverse-kubernetes. +# +# We are not using upstream Payara images because: +# - Using same base image as Solr (https://hub.docker.com/_/solr) is reducing pulls +# - Their image is less optimised for production usage by design choices +# + +# Make the Java base image and version configurable (useful for trying newer Java versions and flavors) +ARG BASE_IMAGE="openjdk:11-jre" +FROM $BASE_IMAGE + +# Default payara ports to expose +# 4848: admin console +# 9009: debug port (JPDA) +# 8080: http +# 8181: https +EXPOSE 4848 9009 8080 8181 + +ENV HOME_DIR="/opt/payara" +ENV PAYARA_DIR="${HOME_DIR}/appserver" \ + SCRIPT_DIR="${HOME_DIR}/scripts" \ + CONFIG_DIR="${HOME_DIR}/config" \ + DEPLOY_DIR="${HOME_DIR}/deployments" \ + DOCROOT_DIR="/docroot" \ + SECRETS_DIR="/secrets" \ + DUMPS_DIR="/dumps" \ + PASSWORD_FILE="${HOME_DIR}/passwordFile" \ + ADMIN_USER="admin" \ + ADMIN_PASSWORD="admin" \ + DOMAIN_NAME="domain1" \ + PAYARA_ARGS="" +ENV PATH="${PATH}:${PAYARA_DIR}/bin" \ + DOMAIN_DIR="${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}" \ + DEPLOY_PROPS="" \ + PREBOOT_COMMANDS="${CONFIG_DIR}/pre-boot-commands.asadmin" \ + POSTBOOT_COMMANDS="${CONFIG_DIR}/post-boot-commands.asadmin" \ + JVM_ARGS="" \ + MEM_MAX_RAM_PERCENTAGE="70.0" \ + MEM_XSS="512k" \ + # Source: https://github.com/fabric8io-images/run-java-sh/blob/master/TUNING.md#recommandations + MEM_MIN_HEAP_FREE_RATIO="20" \ + MEM_MAX_HEAP_FREE_RATIO="40" \ + MEM_MAX_GC_PAUSE_MILLIS="500" \ + MEM_METASPACE_SIZE="256m" \ + MEM_MAX_METASPACE_SIZE="2g" \ + # Make heap dumps on OOM appear in DUMPS_DIR + ENABLE_DUMPS=0 \ + JVM_DUMPS_ARG="-XX:+HeapDumpOnOutOfMemoryError" + +ARG ESH_VERSION=0.3.1 +ARG ESH_CHECKSUM="1e0bd783f930cba13d6708b11c1ac844bbb1eddd02ac1666fc10d47eb9517bd7" +ARG JATTACH_VERSION="v2.0" +ARG JATTACH_CHECKSUM="989dc53279c7fb3ec399dbff1692647439286e5a4339c2849fd4323e998af7f8" +ARG PKGS="jq imagemagick curl unzip wget acl dirmngr gpg lsof procps netcat tini" +ARG ASADMIN="${PAYARA_DIR}/bin/asadmin --user=${ADMIN_USER} --passwordfile=${PASSWORD_FILE}" + +### PART 1: SYSTEM ### +USER root +WORKDIR / +SHELL ["/bin/bash", "-euo", "pipefail", "-c"] +RUN true && \ + # Create pathes + mkdir -p "${HOME_DIR}" "${PAYARA_DIR}" "${DEPLOY_DIR}" "${CONFIG_DIR}" "${SCRIPT_DIR}" && \ + mkdir -p "${DOCROOT_DIR}" "${SECRETS_DIR}" "${DUMPS_DIR}" && \ + # Create user + addgroup --gid 1000 payara && \ + adduser --system --uid 1000 --no-create-home --shell /bin/bash --home "${HOME_DIR}" --gecos "" --ingroup payara payara && \ + echo payara:payara | chpasswd && \ + # Set permissions + chown -R payara: "${HOME_DIR}" && \ + chown -R payara: "${DOCROOT_DIR}" "${SECRETS_DIR}" "${DUMPS_DIR}" + +# Installing the packages in an extra container layer for better caching +RUN true && \ + # Install packages + apt-get update -q && \ + apt-get install -qqy --no-install-recommends ${PKGS} && \ + # Download & check esh template script + curl -sSfL -o /usr/bin/esh "https://raw.githubusercontent.com/jirutka/esh/v${ESH_VERSION}/esh" && \ + echo "${ESH_CHECKSUM} /usr/bin/esh" | sha256sum -c - && \ + chmod +x /usr/bin/esh && \ + # Install jattach + curl -sSfL -o /usr/bin/jattach "https://github.com/apangin/jattach/releases/download/${JATTACH_VERSION}/jattach" && \ + echo "${JATTACH_CHECKSUM} /usr/bin/jattach" | sha256sum -c - && \ + chmod +x /usr/bin/jattach && \ + # Cleanup + rm -rf "/var/lib/apt/lists/*" + +### PART 2: PAYARA ### +# After setting up system, now configure Payara +USER payara +WORKDIR ${HOME_DIR} + +# Copy Payara from build context (cached by Maven) +COPY --chown=payara:payara maven/appserver ${PAYARA_DIR}/ + +# Copy the system (appserver level) scripts like entrypoint, etc +COPY --chown=payara:payara maven/scripts ${SCRIPT_DIR}/ + +# Configure the domain to be container and production ready +RUN true && \ + # Set admin password + echo "AS_ADMIN_PASSWORD=" > /tmp/password-change-file.txt && \ + echo "AS_ADMIN_NEWPASSWORD=${ADMIN_PASSWORD}" >> /tmp/password-change-file.txt && \ + echo "AS_ADMIN_PASSWORD=${ADMIN_PASSWORD}" >> ${PASSWORD_FILE} && \ + asadmin --user=${ADMIN_USER} --passwordfile=/tmp/password-change-file.txt change-admin-password --domain_name=${DOMAIN_NAME} && \ + # Start domain for configuration + ${ASADMIN} start-domain ${DOMAIN_NAME} && \ + # Allow access to admin with password only + ${ASADMIN} enable-secure-admin && \ + ### CONTAINER USAGE ENABLEMENT + # List & delete memory settings from domain + for MEMORY_JVM_OPTION in $(${ASADMIN} list-jvm-options | grep "Xm[sx]\|Xss\|NewRatio"); \ + do \ + ${ASADMIN} delete-jvm-options $(echo $MEMORY_JVM_OPTION | sed -e 's/:/\\:/g'); \ + done && \ + # Tweak memory settings for containers + ${ASADMIN} create-jvm-options "-XX\:+UseContainerSupport" && \ + ${ASADMIN} create-jvm-options "-XX\:MaxRAMPercentage=\${ENV=MEM_MAX_RAM_PERCENTAGE}" && \ + ${ASADMIN} create-jvm-options "-Xss\${ENV=MEM_XSS}" && \ + ${ASADMIN} create-jvm-options "-XX\:MinHeapFreeRatio=\${ENV=MEM_MIN_HEAP_FREE_RATIO}" && \ + ${ASADMIN} create-jvm-options "-XX\:MaxHeapFreeRatio=\${ENV=MEM_MAX_HEAP_FREE_RATIO}" && \ + ${ASADMIN} create-jvm-options "-XX\:HeapDumpPath=\${ENV=DUMPS_DIR}" && \ + # Set logging to console only for containers + ${ASADMIN} set-log-attributes com.sun.enterprise.server.logging.GFFileHandler.logtoFile=false && \ + ### PRODUCTION READINESS + ${ASADMIN} create-jvm-options '-XX\:+UseG1GC' && \ + ${ASADMIN} create-jvm-options '-XX\:+UseStringDeduplication' && \ + ${ASADMIN} create-jvm-options '-XX\:MaxGCPauseMillis=${ENV=MEM_MAX_GC_PAUSE_MILLIS}' && \ + ${ASADMIN} create-jvm-options '-XX\:MetaspaceSize=${ENV=MEM_METASPACE_SIZE}' && \ + ${ASADMIN} create-jvm-options '-XX\:MaxMetaspaceSize=${ENV=MEM_MAX_METASPACE_SIZE}' && \ + ${ASADMIN} create-jvm-options '-XX\:+IgnoreUnrecognizedVMOptions' && \ + # Enlarge thread pools + ${ASADMIN} set server-config.thread-pools.thread-pool.http-thread-pool.max-thread-pool-size="50" && \ + ${ASADMIN} set server-config.thread-pools.thread-pool.http-thread-pool.max-queue-size="" && \ + ${ASADMIN} set default-config.thread-pools.thread-pool.thread-pool-1.max-thread-pool-size="250" && \ + # Enable file caching + ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-1.http.file-cache.enabled="true" && \ + ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-2.http.file-cache.enabled="true" && \ + ${ASADMIN} set default-config.network-config.protocols.protocol.http-listener-1.http.file-cache.enabled="true" && \ + ${ASADMIN} set default-config.network-config.protocols.protocol.http-listener-2.http.file-cache.enabled="true" && \ + # Enlarge EJB pools (cannot do this for server-config as set does not create new entries) + ${ASADMIN} set default-config.ejb-container.max-pool-size="128" && \ + # Misc settings + ${ASADMIN} create-system-properties fish.payara.classloading.delegate="false" && \ + ${ASADMIN} create-system-properties jersey.config.client.readTimeout="300000" && \ + ${ASADMIN} create-system-properties jersey.config.client.connectTimeout="300000" && \ + ### DATAVERSE APPLICATION SPECIFICS + # Configure the MicroProfile directory config source to point to /secrets + ${ASADMIN} set-config-dir --directory="${SECRETS_DIR}" && \ + # Make request timeouts configurable via MPCONFIG (default to 900 secs = 15 min) + ${ASADMIN} set 'server-config.network-config.protocols.protocol.http-listener-1.http.request-timeout-seconds=${MPCONFIG=dataverse.http.timeout:900}' && \ + # TODO: what of the below 3 items can be deleted for container usage? + ${ASADMIN} create-network-listener --protocol=http-listener-1 --listenerport=8009 --jkenabled=true jk-connector && \ + ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-1.http.comet-support-enabled=true && \ + ${ASADMIN} create-system-properties javax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl && \ + # Always disable phoning home... + ${ASADMIN} disable-phone-home && \ + ### CLEANUP + # Stop domain + ${ASADMIN} stop-domain "${DOMAIN_NAME}" && \ + # Delete generated files + rm -rf \ + "/tmp/password-change-file.txt" \ + "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/osgi-cache" \ + "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/logs" + +# Make docroot of Payara reside in higher level directory for easier targeting +# Due to gdcc/dataverse-kubernetes#177: create the generated pathes so they are +# writeable by us. TBR with gdcc/dataverse-kubernetes#178. +RUN rm -rf "${DOMAIN_DIR}"/docroot && \ + ln -s "${DOCROOT_DIR}" "${DOMAIN_DIR}"/docroot && \ + mkdir -p "${DOMAIN_DIR}"/generated/jsp/dataverse + +# Set the entrypoint to tini (as a process supervisor) +ENTRYPOINT ["/usr/bin/tini", "--"] +# JSON syntax should be used, but bypassed shell. Thus re-add expansion via shell exec. +CMD ["sh", "-c", "${SCRIPT_DIR}/entrypoint.sh"] + +LABEL org.opencontainers.image.created="@git.build.time@" \ + org.opencontainers.image.authors="Research Data Management at FZJ " \ + org.opencontainers.image.url="https://k8s-docs.gdcc.io" \ + org.opencontainers.image.documentation="https://k8s-docs.gdcc.io" \ + org.opencontainers.image.source="https://github.com/gdcc/dataverse/tree/develop%2Bct/modules/container-base" \ + org.opencontainers.image.version="@project.version@" \ + org.opencontainers.image.revision="@git.commit.id.abbrev@" \ + org.opencontainers.image.vendor="Global Dataverse Community Consortium" \ + org.opencontainers.image.licenses="Apache-2.0" \ + org.opencontainers.image.title="dataverse-k8s :: Dataverse containerized" \ + org.opencontainers.image.description="This container image provides an application server tuned for Dataverse software" diff --git a/modules/container-base/src/main/docker/assembly.xml b/modules/container-base/src/main/docker/assembly.xml new file mode 100644 index 00000000000..afd5530fa60 --- /dev/null +++ b/modules/container-base/src/main/docker/assembly.xml @@ -0,0 +1,17 @@ + + + + + ${project.basedir}/target/payara5 + appserver + + + + ${project.basedir}/src/main/docker/scripts + scripts + 0755 + + + \ No newline at end of file diff --git a/modules/container-base/src/main/docker/scripts/entrypoint.sh b/modules/container-base/src/main/docker/scripts/entrypoint.sh new file mode 100644 index 00000000000..6f71dfe013c --- /dev/null +++ b/modules/container-base/src/main/docker/scripts/entrypoint.sh @@ -0,0 +1,17 @@ +#!/bin/bash +########################################################################################################## +# +# This script is a fork of https://github.com/payara/Payara/blob/master/appserver/extras/docker-images/ +# server-full/src/main/docker/bin/entrypoint.sh and licensed under CDDL 1.1 by the Payara Foundation. +# +########################################################################################################## + +for f in "${SCRIPT_DIR}"/init_* "${SCRIPT_DIR}"/init.d/*; do + case "$f" in + *.sh) echo "[Entrypoint] running $f"; . "$f" ;; + *) echo "[Entrypoint] ignoring $f" ;; + esac + echo +done + +exec "${SCRIPT_DIR}"/startInForeground.sh "${PAYARA_ARGS}" diff --git a/modules/container-base/src/main/docker/scripts/init_1_generate_deploy_commands.sh b/modules/container-base/src/main/docker/scripts/init_1_generate_deploy_commands.sh new file mode 100644 index 00000000000..e2d717af666 --- /dev/null +++ b/modules/container-base/src/main/docker/scripts/init_1_generate_deploy_commands.sh @@ -0,0 +1,65 @@ +#!/bin/bash +########################################################################################################## +# +# A script to append deploy commands to the post boot command file at +# $PAYARA_HOME/scripts/post-boot-commands.asadmin file. All applications in the +# $DEPLOY_DIR (either files or folders) will be deployed. +# The $POSTBOOT_COMMANDS file can then be used with the start-domain using the +# --postbootcommandfile parameter to deploy applications on startup. +# +# Usage: +# ./generate_deploy_commands.sh +# +# Optionally, any number of parameters of the asadmin deploy command can be +# specified as parameters to this script. +# E.g., to deploy applications with implicit CDI scanning disabled: +# +# ./generate_deploy_commands.sh --properties=implicitCdiEnabled=false +# +# Environment variables used: +# - $PREBOOT_COMMANDS - the pre boot command file. +# - $POSTBOOT_COMMANDS - the post boot command file. +# +# Note that many parameters to the deploy command can be safely used only when +# a single application exists in the $DEPLOY_DIR directory. +# +########################################################################################################## +# +# This script is a fork of https://github.com/payara/Payara/blob/master/appserver/extras/docker-images/ +# server-full/src/main/docker/bin/init_1_generate_deploy_commands.sh and licensed under CDDL 1.1 +# by the Payara Foundation. +# +########################################################################################################## + +# Check required variables are set +if [ -z "$DEPLOY_DIR" ]; then echo "Variable DEPLOY_DIR is not set."; exit 1; fi +if [ -z "$PREBOOT_COMMANDS" ]; then echo "Variable PREBOOT_COMMANDS is not set."; exit 1; fi +if [ -z "$POSTBOOT_COMMANDS" ]; then echo "Variable POSTBOOT_COMMANDS is not set."; exit 1; fi + +# Create pre and post boot command files if they don't exist +touch "$POSTBOOT_COMMANDS" +touch "$PREBOOT_COMMANDS" + +deploy() { + + if [ -z "$1" ]; then + echo "No deployment specified"; + exit 1; + fi + + DEPLOY_STATEMENT="deploy $DEPLOY_PROPS $1" + if grep -q "$1" "$POSTBOOT_COMMANDS"; then + echo "post boot commands already deploys $1"; + else + echo "Adding deployment target $1 to post boot commands"; + echo "$DEPLOY_STATEMENT" >> "$POSTBOOT_COMMANDS"; + fi +} + +# RAR files first +find "$DEPLOY_DIR" -mindepth 1 -maxdepth 1 -name "*.rar" -print0 \ + | while IFS= read -r -d '' file; do deploy "$file"; done + +# Then every other WAR, EAR, JAR or directory +find "$DEPLOY_DIR" -mindepth 1 -maxdepth 1 ! -name "*.rar" -a -name "*.war" -o -name "*.ear" -o -name "*.jar" -o -type d -print0 \ + | while IFS= read -r -d '' file; do deploy "$file"; done \ No newline at end of file diff --git a/modules/container-base/src/main/docker/scripts/startInForeground.sh b/modules/container-base/src/main/docker/scripts/startInForeground.sh new file mode 100644 index 00000000000..4843f6ae055 --- /dev/null +++ b/modules/container-base/src/main/docker/scripts/startInForeground.sh @@ -0,0 +1,89 @@ +#!/bin/bash +########################################################################################################## +# +# This script is to execute Payara Server in foreground, mainly in a docker environment. +# It allows to avoid running 2 instances of JVM, which happens with the start-domain --verbose command. +# +# Usage: +# Running +# startInForeground.sh +# is equivalent to running +# asadmin start-domain +# +# It's possible to use any arguments of the start-domain command as arguments to startInForeground.sh +# +# Environment variables used: +# - $ADMIN_USER - the username to use for the asadmin utility. +# - $PASSWORD_FILE - the password file to use for the asadmin utility. +# - $PREBOOT_COMMANDS - the pre boot command file. +# - $POSTBOOT_COMMANDS - the post boot command file. +# - $DOMAIN_NAME - the name of the domain to start. +# - $JVM_ARGS - extra JVM options to pass to the Payara Server instance. +# - $AS_ADMIN_MASTERPASSWORD - the master password for the Payara Server instance. +# +# This script executes the asadmin tool which is expected at ~/appserver/bin/asadmin. +# +########################################################################################################## +# +# This script is a fork of https://github.com/payara/Payara/blob/master/appserver/ +# extras/docker-images/server-full/src/main/docker/bin/startInForeground.sh and licensed under CDDL 1.1 +# by the Payara Foundation. +# +########################################################################################################## + +# Check required variables are set +if [ -z "$ADMIN_USER" ]; then echo "Variable ADMIN_USER is not set."; exit 1; fi +if [ -z "$PASSWORD_FILE" ]; then echo "Variable PASSWORD_FILE is not set."; exit 1; fi +if [ -z "$PREBOOT_COMMANDS" ]; then echo "Variable PREBOOT_COMMANDS is not set."; exit 1; fi +if [ -z "$POSTBOOT_COMMANDS" ]; then echo "Variable POSTBOOT_COMMANDS is not set."; exit 1; fi +if [ -z "$DOMAIN_NAME" ]; then echo "Variable DOMAIN_NAME is not set."; exit 1; fi + +# Check if dumps are enabled - add arg to JVM_ARGS in this case +if [ -n "${ENABLE_DUMPS}" ] && [ "${ENABLE_DUMPS}" = "1" ]; then + JVM_ARGS="${JVM_DUMPS_ARG} ${JVM_ARGS}" +fi + +# The following command gets the command line to be executed by start-domain +# - print the command line to the server with --dry-run, each argument on a separate line +# - remove -read-string argument +# - surround each line except with parenthesis to allow spaces in paths +# - remove lines before and after the command line and squash commands on a single line + +# Create pre and post boot command files if they don't exist +touch "$POSTBOOT_COMMANDS" +touch "$PREBOOT_COMMANDS" + +# shellcheck disable=SC2068 +# -- Using $@ is necessary here as asadmin cannot deal with options enclosed in ""! +OUTPUT=$("${PAYARA_DIR}"/bin/asadmin --user="${ADMIN_USER}" --passwordfile="${PASSWORD_FILE}" start-domain --dry-run --prebootcommandfile="${PREBOOT_COMMANDS}" --postbootcommandfile="${POSTBOOT_COMMANDS}" $@ "$DOMAIN_NAME") +STATUS=$? +if [ "$STATUS" -ne 0 ] + then + echo ERROR: "$OUTPUT" >&2 + exit 1 +fi + +COMMAND=$(echo "$OUTPUT"\ + | sed -n -e '2,/^$/p'\ + | sed "s|glassfish.jar|glassfish.jar $JVM_ARGS |g") + +echo Executing Payara Server with the following command line: +echo "$COMMAND" | tr ' ' '\n' +echo + +# Run the server in foreground - read master password from variable or file or use the default "changeit" password + +set +x +if test "$AS_ADMIN_MASTERPASSWORD"x = x -a -f "$PASSWORD_FILE" + then + # shellcheck disable=SC1090 + source "$PASSWORD_FILE" +fi +if test "$AS_ADMIN_MASTERPASSWORD"x = x + then + AS_ADMIN_MASTERPASSWORD=changeit +fi +echo "AS_ADMIN_MASTERPASSWORD=$AS_ADMIN_MASTERPASSWORD" > /tmp/masterpwdfile +# shellcheck disable=SC2086 +# -- Unquoted exec var is necessary, as otherwise things get escaped that may not be escaped (parameters for Java) +exec ${COMMAND} < /tmp/masterpwdfile diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 14b84f80279..4db2232be7d 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -182,6 +182,10 @@ 3.0.0-M5 3.3.0 3.1.2 + + + 0.39.1 + ghcr.io @@ -244,6 +248,11 @@ + + io.fabric8 + docker-maven-plugin + ${fabric8-dmp.version} + @@ -315,4 +324,46 @@ --> + + + ct + + + 5.2022.1 + + + + + + + io.github.git-commit-id + git-commit-id-maven-plugin + 5.0.0 + + + retrieve-git-details + + revision + + initialize + + + + ${project.basedir}/../../.git + UTC + 8 + false + + + + + + + + From 2319a4787e0c4e41b633382ed7c9684130933be8 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 16 Jun 2022 21:22:41 +0200 Subject: [PATCH 168/608] feat(ct-base): remove the esh tool Will be replaced with a capability to make API endpoints for authentication providers read from MPCONFIG sources. --- modules/container-base/src/main/docker/Dockerfile | 6 ------ 1 file changed, 6 deletions(-) diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index 635fbd89142..491c0747ada 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -61,8 +61,6 @@ ENV PATH="${PATH}:${PAYARA_DIR}/bin" \ ENABLE_DUMPS=0 \ JVM_DUMPS_ARG="-XX:+HeapDumpOnOutOfMemoryError" -ARG ESH_VERSION=0.3.1 -ARG ESH_CHECKSUM="1e0bd783f930cba13d6708b11c1ac844bbb1eddd02ac1666fc10d47eb9517bd7" ARG JATTACH_VERSION="v2.0" ARG JATTACH_CHECKSUM="989dc53279c7fb3ec399dbff1692647439286e5a4339c2849fd4323e998af7f8" ARG PKGS="jq imagemagick curl unzip wget acl dirmngr gpg lsof procps netcat tini" @@ -89,10 +87,6 @@ RUN true && \ # Install packages apt-get update -q && \ apt-get install -qqy --no-install-recommends ${PKGS} && \ - # Download & check esh template script - curl -sSfL -o /usr/bin/esh "https://raw.githubusercontent.com/jirutka/esh/v${ESH_VERSION}/esh" && \ - echo "${ESH_CHECKSUM} /usr/bin/esh" | sha256sum -c - && \ - chmod +x /usr/bin/esh && \ # Install jattach curl -sSfL -o /usr/bin/jattach "https://github.com/apangin/jattach/releases/download/${JATTACH_VERSION}/jattach" && \ echo "${JATTACH_CHECKSUM} /usr/bin/jattach" | sha256sum -c - && \ From f0202cb2c177c5ebeeb176c58c8b27256d32697b Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 17 Jun 2022 10:29:37 +0200 Subject: [PATCH 169/608] chore(deps): update container plugin and payara version for containers --- modules/dataverse-parent/pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 4db2232be7d..fa693f8a8ac 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -184,7 +184,7 @@ 3.1.2 - 0.39.1 + 0.40.1 ghcr.io @@ -334,7 +334,7 @@ See also: https://github.com/IQSS/dataverse/issues/8048 See also: https://github.com/payara/Payara/issues/5368 --> - 5.2022.1 + 5.2022.2 From 2dc0596d8634cadecb691b95a39ba5a3355fcd99 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Aug 2022 13:54:41 +0200 Subject: [PATCH 170/608] fix(ct-base): unpack Payara to target/payara Payara 5 defaults to a "payara5" topmost dir, Payara 6 to "payara6". To avoid adding different directories in the assembly, cut the number from the directories name when unpacking. This does not prevent you from doing stupid things like not cleaning before switching the version leading to an unknown state of old and new libs, etc. --- modules/container-base/pom.xml | 6 ++++++ modules/container-base/src/main/docker/assembly.xml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml index 8cb7e1ac795..765a4c72843 100644 --- a/modules/container-base/pom.xml +++ b/modules/container-base/pom.xml @@ -51,6 +51,12 @@ zip false ${project.build.directory} + + + ^payara\d + payara + + diff --git a/modules/container-base/src/main/docker/assembly.xml b/modules/container-base/src/main/docker/assembly.xml index afd5530fa60..9fc62d49fa1 100644 --- a/modules/container-base/src/main/docker/assembly.xml +++ b/modules/container-base/src/main/docker/assembly.xml @@ -4,7 +4,7 @@ - ${project.basedir}/target/payara5 + ${project.basedir}/target/payara appserver From 246f8b8cbfd18356c6f2cb63481d1fa02afad390 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Aug 2022 14:03:19 +0200 Subject: [PATCH 171/608] fix(ct-base): migrate base image from OpenJDK to Eclipse Temurin There was an ongoing discussion that the Docker Hub Image "openjdk" is not backed by any official supported project but complete goodwill of Oracle shipping their JRE/JDK. There is no "real" release of OpenJDK . There exist only real distributions like Oracle JDK, Eclipse Temurin, Azul JDK, AWS Corretto etc (see https://whichjdk.com). As for this reason the "openjdk" image has been deprecated, switching to Eclipse Temurin JRE here. See also: https://github.com/docker-library/openjdk/issues/505 --- modules/container-base/pom.xml | 2 +- modules/container-base/src/main/docker/Dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml index 765a4c72843..5ebaa9ea323 100644 --- a/modules/container-base/pom.xml +++ b/modules/container-base/pom.xml @@ -78,7 +78,7 @@ Dockerfile - openjdk:${target.java.version}-jre + eclipse-temurin:${target.java.version}-jre @ diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index 491c0747ada..2fed83db59f 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -20,7 +20,7 @@ # # Make the Java base image and version configurable (useful for trying newer Java versions and flavors) -ARG BASE_IMAGE="openjdk:11-jre" +ARG BASE_IMAGE="eclipse-temurin:11-jre" FROM $BASE_IMAGE # Default payara ports to expose From 76ea50871bafe028d1edad35f441e7731398ed00 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Aug 2022 14:05:06 +0200 Subject: [PATCH 172/608] chore(deps): update Docker Maven Plugin to the latest release --- modules/dataverse-parent/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index fa693f8a8ac..eaa09b61bd7 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -184,7 +184,7 @@ 3.1.2 - 0.40.1 + 0.40.2 ghcr.io From f62dee2ec6a5dd237e2fbc10346bdebeb6a3c2f1 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Aug 2022 15:13:48 +0200 Subject: [PATCH 173/608] feat(ct-base): enable multiarch image build via docker buildx With the rise of Apple M1/M2 silicons, we need to provide ARM64 based images in addition to AMD64. --- modules/container-base/pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml index 5ebaa9ea323..add8a120a58 100644 --- a/modules/container-base/pom.xml +++ b/modules/container-base/pom.xml @@ -76,6 +76,12 @@ %g/base:jdk${target.java.version} ${ct.registry} + + + linux/arm64 + linux/amd64 + + Dockerfile eclipse-temurin:${target.java.version}-jre From 72935d481e1e1ab260e763a000bfef172629cc16 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Aug 2022 12:08:13 +0200 Subject: [PATCH 174/608] chore(ct-base): add maintainer details to POM --- modules/container-base/pom.xml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml index add8a120a58..015ebba598d 100644 --- a/modules/container-base/pom.xml +++ b/modules/container-base/pom.xml @@ -16,6 +16,18 @@ Container Base Image This module provides an application server base image to be decorated with the Dataverse app. + + + poikilotherm + Oliver Bertuch + github@bertuch.eu + Europe/Berlin + + maintainer + + + + From 17d8b53bb985fc77faebc8273b84012fac2bb525 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Aug 2022 12:09:21 +0200 Subject: [PATCH 175/608] docs(ct-base): update OCI tag labels --- modules/container-base/src/main/docker/Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index 2fed83db59f..036e2f17831 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -187,12 +187,12 @@ CMD ["sh", "-c", "${SCRIPT_DIR}/entrypoint.sh"] LABEL org.opencontainers.image.created="@git.build.time@" \ org.opencontainers.image.authors="Research Data Management at FZJ " \ - org.opencontainers.image.url="https://k8s-docs.gdcc.io" \ - org.opencontainers.image.documentation="https://k8s-docs.gdcc.io" \ - org.opencontainers.image.source="https://github.com/gdcc/dataverse/tree/develop%2Bct/modules/container-base" \ + org.opencontainers.image.url="https://guides.dataverse.org/en/latest/container/" \ + org.opencontainers.image.documentation="https://guides.dataverse.org/en/latest/container/" \ + org.opencontainers.image.source="https://github.com/IQSS/dataverse/tree/develop/modules/container-base" \ org.opencontainers.image.version="@project.version@" \ org.opencontainers.image.revision="@git.commit.id.abbrev@" \ org.opencontainers.image.vendor="Global Dataverse Community Consortium" \ org.opencontainers.image.licenses="Apache-2.0" \ - org.opencontainers.image.title="dataverse-k8s :: Dataverse containerized" \ + org.opencontainers.image.title="Dataverse Base Image" \ org.opencontainers.image.description="This container image provides an application server tuned for Dataverse software" From 0a9947bd6868b9b45314b6fe0cfc918c48ed4eeb Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Aug 2022 12:11:14 +0200 Subject: [PATCH 176/608] feat(ct-base): add debug/develop mode script --- .../container-base/src/main/docker/Dockerfile | 4 +- .../init_1_generate_devmode_commands.sh | 61 +++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index 036e2f17831..fe44fc61847 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -59,7 +59,9 @@ ENV PATH="${PATH}:${PAYARA_DIR}/bin" \ MEM_MAX_METASPACE_SIZE="2g" \ # Make heap dumps on OOM appear in DUMPS_DIR ENABLE_DUMPS=0 \ - JVM_DUMPS_ARG="-XX:+HeapDumpOnOutOfMemoryError" + JVM_DUMPS_ARG="-XX:+HeapDumpOnOutOfMemoryError" \ + ENABLE_JMX=0 \ + ENABLE_JDWP=0 ARG JATTACH_VERSION="v2.0" ARG JATTACH_CHECKSUM="989dc53279c7fb3ec399dbff1692647439286e5a4339c2849fd4323e998af7f8" diff --git a/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh b/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh new file mode 100644 index 00000000000..9d71e3bb81b --- /dev/null +++ b/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh @@ -0,0 +1,61 @@ +#!/bin/bash + +set -euo pipefail + +###### ###### ###### ###### ###### ###### ###### ###### ###### ###### ###### +# This script enables different development options, like a JMX connector +# usable with VisualVM, JRebel hot-reload support and JDWP debugger service. +# Enable it by adding env vars on startup (e.g. via ConfigMap) +# +# As this script is "sourced" from entrypoint.sh, we can manipulate env vars +# for the parent shell before executing Payara. +###### ###### ###### ###### ###### ###### ###### ###### ###### ###### ###### + +# 0. Init variables +ENABLE_JMX=${ENABLE_JMX:-0} +ENABLE_JDWP=${ENABLE_JDWP:-0} + +DV_PREBOOT=${PAYARA_DIR}/dataverse_preboot +echo "# Dataverse preboot configuration for Payara" > "${DV_PREBOOT}" + +# 1. Configure JMX (enabled by default on port 8686, but requires SSL) +# See also https://blog.payara.fish/monitoring-payara-server-with-jconsole +# To still use it, you can use a sidecar container proxying or using JMX via localhost without SSL. +if [ "${ENABLE_JMX}" = "1" ]; then + echo "Enabling unsecured JMX on 0.0.0.0:8686. You'll need a sidecar for this, as access is allowed from same machine only (without SSL)." + { \ + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jvm=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.connector-service=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.connector-connection-pool=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jdbc-connection-pool=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.web-services-container=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.ejb-container=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.thread-pool=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.http-service=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.security=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jms-service=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jersey=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.transaction-service=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jpa=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.web-container=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.orb=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.deployment=HIGH" + #echo "set configs.config.server-config.admin-service.jmx-connector.system.address=127.0.0.1" + echo "set configs.config.server-config.admin-service.jmx-connector.system.security-enabled=false" + } >> "${DV_PREBOOT}" +fi + +# 2. Enable JDWP via debugging switch +if [ "${ENABLE_JDWP}" = "1" ]; then + echo "Enabling JDWP remote debugging support via asadmin debugging switch." + export PAYARA_ARGS="${PAYARA_ARGS} --debug=true" +fi + +# 3. Add the commands to the existing preboot file, but insert BEFORE deployment +TMP_PREBOOT=$(mktemp) +cat "${DV_PREBOOT}" "${PREBOOT_COMMANDS}" > "${TMP_PREBOOT}" +mv "${TMP_PREBOOT}" "${PREBOOT_COMMANDS}" +echo "DEBUG: preboot contains the following commands:" +echo "--------------------------------------------------" +cat "${PREBOOT_COMMANDS}" +echo "--------------------------------------------------" \ No newline at end of file From 2e812dcc15413d5814072b86971b924ee13824e4 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 22 Aug 2022 13:50:48 +0200 Subject: [PATCH 177/608] deps(ct-base): update to jattach v2.1 --- modules/container-base/src/main/docker/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index fe44fc61847..d13808c3272 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -63,8 +63,8 @@ ENV PATH="${PATH}:${PAYARA_DIR}/bin" \ ENABLE_JMX=0 \ ENABLE_JDWP=0 -ARG JATTACH_VERSION="v2.0" -ARG JATTACH_CHECKSUM="989dc53279c7fb3ec399dbff1692647439286e5a4339c2849fd4323e998af7f8" +ARG JATTACH_VERSION="v2.1" +ARG JATTACH_CHECKSUM="07885fdc782e02e7302c6d190f54c3930afa10a38140365adf54076ec1086a8e" ARG PKGS="jq imagemagick curl unzip wget acl dirmngr gpg lsof procps netcat tini" ARG ASADMIN="${PAYARA_DIR}/bin/asadmin --user=${ADMIN_USER} --passwordfile=${PASSWORD_FILE}" From 7e836c70dd44a538bf1fdd0d73045730da053951 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 23 Aug 2022 00:10:24 +0200 Subject: [PATCH 178/608] chore(ct-base): add JMX to exposed ports and make it default enabled as in Payara --- modules/container-base/src/main/docker/Dockerfile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index d13808c3272..ba459607826 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -25,10 +25,11 @@ FROM $BASE_IMAGE # Default payara ports to expose # 4848: admin console -# 9009: debug port (JPDA) +# 9009: debug port (JDWP) # 8080: http # 8181: https -EXPOSE 4848 9009 8080 8181 +# 8686: JMX +EXPOSE 4848 9009 8080 8181 8686 ENV HOME_DIR="/opt/payara" ENV PAYARA_DIR="${HOME_DIR}/appserver" \ @@ -60,7 +61,7 @@ ENV PATH="${PATH}:${PAYARA_DIR}/bin" \ # Make heap dumps on OOM appear in DUMPS_DIR ENABLE_DUMPS=0 \ JVM_DUMPS_ARG="-XX:+HeapDumpOnOutOfMemoryError" \ - ENABLE_JMX=0 \ + ENABLE_JMX=1 \ ENABLE_JDWP=0 ARG JATTACH_VERSION="v2.1" From fe7b2d06148e6a2e6d6b2939f366de9ea2162cff Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 23 Aug 2022 00:11:08 +0200 Subject: [PATCH 179/608] docs(ct): add container guide to guides index --- doc/sphinx-guides/source/container/index.rst | 26 ++++++++++++++++++++ doc/sphinx-guides/source/index.rst | 7 ++++-- 2 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 doc/sphinx-guides/source/container/index.rst diff --git a/doc/sphinx-guides/source/container/index.rst b/doc/sphinx-guides/source/container/index.rst new file mode 100644 index 00000000000..1bf86f16f43 --- /dev/null +++ b/doc/sphinx-guides/source/container/index.rst @@ -0,0 +1,26 @@ +Container Guide +=============== + +**Contents:** + +.. toctree:: + + base-image + app-image + +Running Dataverse software in containers is quite different than in a :doc:`classic installation <../installation/prep>`. + +Both approaches have pros and cons. These days (2022) containers are very often used for development and testing, +but there is an ever rising move for running applications in the cloud using container technology. + +**NOTE:** +**As the "Institute for Quantitative Social Sciences" at Harvard is running their installations in the classic +deployment way, the container support is mostly created and maintained by the Dataverse community.** + +This guide is *not* about installation on technology like Docker Swarm, Kubernetes, Rancher or other +solutions to run containers in production. There is the `Dataverse on K8s project `_ for this +purpose. + +This guide focuses on describing the container images managed from the main Dataverse repository (again: by the +community, not IQSS), their features and limitations. Instructions on how to build the images yourself, how to +extend them and how to use them for development purposes may be found in respective subpages. \ No newline at end of file diff --git a/doc/sphinx-guides/source/index.rst b/doc/sphinx-guides/source/index.rst index f7e81756e5b..f15a973544d 100755 --- a/doc/sphinx-guides/source/index.rst +++ b/doc/sphinx-guides/source/index.rst @@ -19,17 +19,20 @@ These documentation guides are for the |version| version of Dataverse. To find g installation/index developers/index style/index + container/index How the Guides Are Organized ---------------------------- The guides are documentation that explain how to use Dataverse, which are divided into the following sections: User Guide, -Installation Guide, Developer Guide, API Guide and Style Guide. The User Guide is further divided into primary activities: finding & using +Installation Guide, Developer Guide, API Guide, Style Guide and Container Guide. +The User Guide is further divided into primary activities: finding & using data, adding Datasets, administering dataverses or Datasets, and Dataset exploration/visualizations. Details on all of the above tasks can be found in the Users Guide. The Installation Guide is for people or organizations who want to host their -own Dataverse. The Developer Guide contains instructions for +own Dataverse. The Container Guide adds to this information on container-based installations. +The Developer Guide contains instructions for people who want to contribute to the Open Source Dataverse project or who want to modify the code to suit their own needs. Finally, the API Guide is for Developers that work on other applications and are interested in connecting with Dataverse through our APIs. From a93dbbdb4c5d2cfed80a13f265238a59f551999a Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 23 Aug 2022 00:12:35 +0200 Subject: [PATCH 180/608] docs(ct-base): add extensive base image module documentation --- .../source/container/base-image.rst | 229 ++++++++++++++++++ 1 file changed, 229 insertions(+) create mode 100644 doc/sphinx-guides/source/container/base-image.rst diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst new file mode 100644 index 00000000000..4f441f79ad7 --- /dev/null +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -0,0 +1,229 @@ +Application Base Image +====================== + +Within the main repository, you may find the base image's files at ``/modules/container-base``. +This Maven module uses the `Maven Docker Plugin `_ to build and ship the image. + +Contents +++++++++ + +The base image provides: + +- `Eclipse Temurin JRE using Java 11 `_ +- `Payara Community Application Server `_ +- CLI tools necessary to run Dataverse (i. e. ``curl`` or ``jq`` - see also :doc:`../installation/prerequisites` in Installation Guide) +- Linux tools for analysis, monitoring and so on +- `Jattach `_ + +This image is created as a "multi-arch image", supporting the most common architectures Dataverse usually runs on: +AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2). + +It inherits being built on an Ubuntu environment from the upstream +`base image of Eclipse Temurin `_. +You are free to change the JRE/JDK image to your liking (see below). + + + +Build Instructions +++++++++++++++++++ + +Assuming you have `Docker `_, `Docker Desktop `_, +`Moby `_ or some remote Docker host configured, up and running from here on. + +Simply execute the Maven modules packaging target with activated "container profile. Either from the projects Git root: + +``mvn -Pct -f modules/container-base package`` + +Or move to the module and execute: + +``cd modules/container-base && mvn -Pct package`` + +Some additional notes, using Maven parameters to change the build and use ...: + +- ... a different Payara version: add ``-Dpayara.version=V.YYYY.R``. +- | ... a different Temurin JRE version ``A``: add ``-Dtarget.java.version=A`` (i.e. ``11``, ``17``, ...). + | *Note:* must resolve to an available Docker tag ``A-jre`` of Eclipse Temurin! +- ... a different Java Distribution: add ``-Ddocker.buildArg.BASE_IMAGE="name:tag"`` with precise reference to an + image available from local or remote (e. g. Docker Hub). + + + +Tunables +++++++++ + +The base image provides a Payara domain suited for production use, but can also be used during development. +Many settings have been carefully selected for best performance and stability of the Dataverse application. + +As with any service, you should always monitor any metrics and make use of the tuning capabilities the base image +provides. These are mostly based on environment variables (very common with containers) and provide sane defaults. + +.. list-table:: + :align: left + :width: 100 + :widths: 10 10 10 50 + :header-rows: 1 + + * - Env. variable + - Default + - Type + - Description + * - ``DEPLOY_PROPS`` + - (empty) + - String + - Set to add arguments to generated `asadmin deploy` commands. + * - ``PREBOOT_COMMANDS`` + - [preboot]_ + - Abs. path + - Provide path to file with ``asadmin`` commands to run **before** boot of application server. + See also `Pre/postboot script docs`_. + * - ``POSTBOOT_COMMANDS`` + - [postboot]_ + - Abs. path + - Provide path to file with ``asadmin`` commands to run **after** boot of application server. + See also `Pre/postboot script docs`_. + * - ``JVM_ARGS`` + - (empty) + - String + - Additional arguments to pass to application server's JVM on start. + * - ``MEM_MAX_RAM_PERCENTAGE`` + - ``70.0`` + - Percentage + - Maximum amount of container's allocated RAM to be used as heap space. + Make sure to leave some room for native memory, OS overhead etc! + * - ``MEM_XSS`` + - ``512k`` + - Size + - Tune the maximum JVM stack size. + * - ``MEM_MIN_HEAP_FREE_RATIO`` + - ``20`` + - Integer + - Make the heap shrink aggressively and grow conservatively. See also `run-java-sh recommendations`_. + * - ``MEM_MAX_HEAP_FREE_RATIO`` + - ``40`` + - Integer + - Make the heap shrink aggressively and grow conservatively. See also `run-java-sh recommendations`_. + * - ``MEM_MAX_GC_PAUSE_MILLIS`` + - ``500`` + - Milliseconds + - Shorter pause times might result in lots of collections causing overhead without much gain. + This needs monitoring and tuning. It's a complex matter. + * - ``MEM_METASPACE_SIZE`` + - ``256m`` + - Size + - Initial size of memory reserved for class metadata, also used as trigger to run a garbage collection + once passing this size. + * - ``MEM_MAX_METASPACE_SIZE`` + - ``2g`` + - Size + - The metaspace's size will not outgrow this limit. + * - ``ENABLE_DUMPS`` + - ``0`` + - Bool, ``0|1`` + - If enabled, the argument(s) given in ``JVM_DUMP_ARG`` will be added to the JVM starting up. + This means it will enable dumping the heap to ``${DUMPS_DIR}`` (see below) in "out of memory" cases. + (You should back this location with disk space / ramdisk, so it does not write into an overlay filesystem!) + * - ``JVM_DUMPS_ARG`` + - [dump-option]_ + - String + - Can be fine tuned for more grained controls of dumping behaviour. + * - ``ENABLE_JMX`` + - ``1`` + - Bool, ``0|1`` + - Enable JMX - Payara enables this by default, hard to deactivate. + * - ``ENABLE_JDWP`` + - ``0`` + - Bool, ``0|1`` + - Enable the "Java Debug Wire Protocol" to attach a remote debugger to the JVM in this container. + Listens on port 9009 when enabled. Search the internet for numerous tutorials to use it. + * - ``DATAVERSE_HTTP_TIMEOUT`` + - ``900`` + - Seconds + - See :ref:`:ApplicationServerSettings` ``http.request-timeout-seconds``. + + *Note:* can also be set using any other `MicroProfile Config Sources`_ available via ``dataverse.http.timeout``. + + +.. [preboot] ``${CONFIG_DIR}/pre-boot-commands.asadmin`` +.. [postboot] ``${CONFIG_DIR}/post-boot-commands.asadmin`` +.. [dump-option] ``-XX:+HeapDumpOnOutOfMemoryError`` + + + +Locations ++++++++++ + +This environment variables represent certain locations and might be reused in your scripts etc. +These variables aren't meant to be reconfigurable and reflect state in the filesystem layout! + +.. list-table:: + :align: left + :width: 100 + :widths: 10 10 50 + :header-rows: 1 + + * - Env. variable + - Value + - Description + * - ``HOME_DIR`` + - ``/opt/payara`` + - Home base to Payara and the application + * - ``PAYARA_DIR`` + - ``${HOME_DIR}/appserver`` + - Installation directory of Payara server + * - ``SCRIPT_DIR`` + - ``${HOME_DIR}/scripts`` + - Any scripts like the container entrypoint, init scripts, etc + * - ``CONFIG_DIR`` + - ``${HOME_DIR}/config`` + - Payara Server configurations like pre/postboot command files go here + (Might be reused for Dataverse one day) + * - ``DEPLOY_DIR`` + - ``${HOME_DIR}/deployments`` + - Any EAR or WAR file, exploded WAR directory etc are autodeployed on start + * - ``DOCROOT_DIR`` + - ``/docroot`` + - Mount a volume here to store i18n language bundle files, sitemaps, images for Dataverse collections, logos, + custom themes and stylesheets, etc here. You might need to replicate this data or place on shared file storage. + * - ``SECRETS_DIR`` + - ``/secrets`` + - Mount secrets or other here, being picked up automatically by + `Directory Config Source `_. + See also various :doc:`../installation/config` options involving secrets. + * - ``DUMPS_DIR`` + - ``/dumps`` + - Default location where heap dumps will be stored (see above). + You should mount some storage here (disk or ephemeral). + * - ``DOMAIN_DIR`` + - ``${PAYARA_DIR}/glassfish`` ``/domains/${DOMAIN_NAME}`` + - Path to root of the Payara domain applications will be deployed into. Usually ``${DOMAIN_NAME}`` will be ``domain1``. + + + +Exposed Ports ++++++++++++++ + +The default ports that are exposed by this image are: + +- 8080 - HTTP listener +- 8181 - HTTPS listener +- 4848 - Admin Service HTTPS listener +- 8686 - JMX listener +- 9009 - "Java Debug Wire Protocol" port (when ``ENABLE_JDWP=1``) + + + +Hints ++++++ + +By default, ``domain1`` is enabled to use the ``G1GC`` garbage collector. + +For running a Java application within a Linux based container, the support for CGroups is essential. It has been +included and activated by default since Java 8u192, Java 11 LTS and later. If you are interested in more details, +you can read about those in a few places like https://developers.redhat.com/articles/2022/04/19/java-17-whats-new-openjdks-container-awareness, +https://www.eclipse.org/openj9/docs/xxusecontainersupport, etc. The other memory defaults are inspired +from `run-java-sh recommendations`_. + + +.. _Pre/postboot script docs: https://docs.payara.fish/community/docs/Technical%20Documentation/Payara%20Micro%20Documentation/Payara%20Micro%20Configuration%20and%20Management/Micro%20Management/Asadmin%20Commands/Pre%20and%20Post%20Boot%20Commands.html +.. _MicroProfile Config Sources: https://docs.payara.fish/community/docs/Technical%20Documentation/MicroProfile/Config/Overview.html +.. _run-java-sh recommendations: https://github.com/fabric8io-images/run-java-sh/blob/master/TUNING.md#recommandations \ No newline at end of file From 67db02ff0249720c47e3025820c30fb6d737ec83 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 24 Aug 2022 15:08:30 +0200 Subject: [PATCH 181/608] docs(ct-base): remove reference to not (yet) existing docs page --- doc/sphinx-guides/source/container/index.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/sphinx-guides/source/container/index.rst b/doc/sphinx-guides/source/container/index.rst index 1bf86f16f43..801ded7d0a5 100644 --- a/doc/sphinx-guides/source/container/index.rst +++ b/doc/sphinx-guides/source/container/index.rst @@ -6,7 +6,6 @@ Container Guide .. toctree:: base-image - app-image Running Dataverse software in containers is quite different than in a :doc:`classic installation <../installation/prep>`. From d5f80754e0ebf1ed56d34c1d7dbbe3d5fdc49b4a Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 24 Aug 2022 17:38:29 +0200 Subject: [PATCH 182/608] docs(ct-base): add Docker Hub Eclipse Temurin tag search example --- doc/sphinx-guides/source/container/base-image.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst index 4f441f79ad7..4333bf38d5c 100644 --- a/doc/sphinx-guides/source/container/base-image.rst +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -43,6 +43,7 @@ Some additional notes, using Maven parameters to change the build and use ...: - ... a different Payara version: add ``-Dpayara.version=V.YYYY.R``. - | ... a different Temurin JRE version ``A``: add ``-Dtarget.java.version=A`` (i.e. ``11``, ``17``, ...). | *Note:* must resolve to an available Docker tag ``A-jre`` of Eclipse Temurin! + (See also `Docker Hub search example `_) - ... a different Java Distribution: add ``-Ddocker.buildArg.BASE_IMAGE="name:tag"`` with precise reference to an image available from local or remote (e. g. Docker Hub). From 5e61241a27229fdbe7ce6fb7e84c520b609fdb33 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 26 Aug 2022 18:26:30 +0200 Subject: [PATCH 183/608] style(ct-base): incorporate requested changes by @pdurbin - Change order of guides - Remove unnecessary quotes from IQSS - Add TOC to base image docs - Add flag again about community support only to base image docs --- doc/sphinx-guides/source/container/base-image.rst | 14 ++++++++++++-- doc/sphinx-guides/source/container/index.rst | 5 +++-- doc/sphinx-guides/source/index.rst | 2 +- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst index 4333bf38d5c..ac64323eeea 100644 --- a/doc/sphinx-guides/source/container/base-image.rst +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -1,11 +1,21 @@ Application Base Image ====================== +.. contents:: |toctitle| + :local: + Within the main repository, you may find the base image's files at ``/modules/container-base``. This Maven module uses the `Maven Docker Plugin `_ to build and ship the image. -Contents -++++++++ +**NOTE: This image is created, maintained and supported by the Dataverse community on a best-effort basis.** +IQSS will not offer you support how to deploy or run it, please reach out to the community for help on using it. +You might be interested in taking a look at :doc:`../developers/containers`, linking you to some (community-based) +efforts. + + + +Image Contents +++++++++++++++ The base image provides: diff --git a/doc/sphinx-guides/source/container/index.rst b/doc/sphinx-guides/source/container/index.rst index 801ded7d0a5..f6c99bfc19e 100644 --- a/doc/sphinx-guides/source/container/index.rst +++ b/doc/sphinx-guides/source/container/index.rst @@ -13,8 +13,9 @@ Both approaches have pros and cons. These days (2022) containers are very often but there is an ever rising move for running applications in the cloud using container technology. **NOTE:** -**As the "Institute for Quantitative Social Sciences" at Harvard is running their installations in the classic -deployment way, the container support is mostly created and maintained by the Dataverse community.** +**As the Institute for Quantitative Social Sciences (IQSS) at Harvard is running their installations in the classic +deployment way, the container support is mostly created and maintained by the Dataverse community on a best-effort +basis.** This guide is *not* about installation on technology like Docker Swarm, Kubernetes, Rancher or other solutions to run containers in production. There is the `Dataverse on K8s project `_ for this diff --git a/doc/sphinx-guides/source/index.rst b/doc/sphinx-guides/source/index.rst index f15a973544d..cbfafb419ab 100755 --- a/doc/sphinx-guides/source/index.rst +++ b/doc/sphinx-guides/source/index.rst @@ -18,8 +18,8 @@ These documentation guides are for the |version| version of Dataverse. To find g api/index installation/index developers/index - style/index container/index + style/index How the Guides Are Organized ---------------------------- From a3a70998b9fcacc1a96e8357d459cba489425785 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 26 Aug 2022 18:29:30 +0200 Subject: [PATCH 184/608] feat(ct-base): make image names configurable and rename Add new Maven properties to choose a different Java base image and change the name of the target base image when people customize it. Also changes the build arg for the Java base image name. With this, the image name changes to follow the same convention as the Java base image. --- doc/sphinx-guides/source/container/base-image.rst | 4 +++- modules/container-base/pom.xml | 7 ++++--- modules/container-base/src/main/docker/Dockerfile | 4 ++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst index ac64323eeea..834381e6779 100644 --- a/doc/sphinx-guides/source/container/base-image.rst +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -50,11 +50,13 @@ Or move to the module and execute: Some additional notes, using Maven parameters to change the build and use ...: +- | ... a different image name and tag: add ``-Dbase.image=name:tag``. + | *Note:* default is ``gdcc/base:${target.java.version}-jre`` - ... a different Payara version: add ``-Dpayara.version=V.YYYY.R``. - | ... a different Temurin JRE version ``A``: add ``-Dtarget.java.version=A`` (i.e. ``11``, ``17``, ...). | *Note:* must resolve to an available Docker tag ``A-jre`` of Eclipse Temurin! (See also `Docker Hub search example `_) -- ... a different Java Distribution: add ``-Ddocker.buildArg.BASE_IMAGE="name:tag"`` with precise reference to an +- ... a different Java Distribution: add ``-Djava.image="name:tag"`` with precise reference to an image available from local or remote (e. g. Docker Hub). diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml index 015ebba598d..f8e97bb4349 100644 --- a/modules/container-base/pom.xml +++ b/modules/container-base/pom.xml @@ -39,6 +39,8 @@ ct docker-build + gdcc/base:${target.java.version}-jre + eclipse-temurin:${target.java.version}-jre @@ -85,8 +87,7 @@ base - %g/base:jdk${target.java.version} - ${ct.registry} + ${base.image} @@ -96,7 +97,7 @@ Dockerfile - eclipse-temurin:${target.java.version}-jre + ${java.image} @ diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index ba459607826..6fdc790a21a 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -20,8 +20,8 @@ # # Make the Java base image and version configurable (useful for trying newer Java versions and flavors) -ARG BASE_IMAGE="eclipse-temurin:11-jre" -FROM $BASE_IMAGE +ARG JAVA_IMAGE="eclipse-temurin:11-jre" +FROM $JAVA_IMAGE # Default payara ports to expose # 4848: admin console From 06d31fde25c3bfa812339c0afad94b7a83e92e59 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 26 Aug 2022 18:34:40 +0200 Subject: [PATCH 185/608] fix(ct-base): make container build use install not package goal By switching to `mvn install` instead of `mvn package`, we allow the main image carrying the application to declare a dependency on the container-base module (to make sure it get's built alongside, as we might want to change the Payara version!) This commits also adds the Maven install plugin to the parent POM for versioning plus to the container-base POM for having the target available. (This is a necessary workaround for a Maven Docker Plugin shortcoming.) --- .../source/container/base-image.rst | 4 ++-- modules/container-base/pom.xml | 19 +++++++++++++++++++ modules/dataverse-parent/pom.xml | 6 ++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst index 834381e6779..585fe1184e7 100644 --- a/doc/sphinx-guides/source/container/base-image.rst +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -42,11 +42,11 @@ Assuming you have `Docker `_, `Docker D Simply execute the Maven modules packaging target with activated "container profile. Either from the projects Git root: -``mvn -Pct -f modules/container-base package`` +``mvn -Pct -f modules/container-base install`` Or move to the module and execute: -``cd modules/container-base && mvn -Pct package`` +``cd modules/container-base && mvn -Pct install`` Some additional notes, using Maven parameters to change the build and use ...: diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml index f8e97bb4349..0e8f24a781b 100644 --- a/modules/container-base/pom.xml +++ b/modules/container-base/pom.xml @@ -108,6 +108,25 @@ + + + + maven-install-plugin + + + default-install + install + + install + + + + diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index eaa09b61bd7..411ce85b2fa 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -178,6 +178,7 @@ 3.2.2 3.3.2 3.2.0 + 3.0.0-M1 3.0.0-M5 3.0.0-M5 3.3.0 @@ -226,6 +227,11 @@ maven-dependency-plugin ${maven-dependency-plugin.version} + + org.apache.maven.plugins + maven-install-plugin + ${maven-install-plugin.version} + org.apache.maven.plugins maven-surefire-plugin From 98ad9361843519b3f904ecc3df5d7b877802c30a Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 26 Aug 2022 18:37:28 +0200 Subject: [PATCH 186/608] fix(ct-base): flatten container-base POM By using the flattening POM plugin, the installed POM will not carry references to the dataverse-parent module. This reference is a) unnecessary and b) troublesome because of the ${revision} hack. (And we do not provide it as a dependency from Central/...) --- modules/container-base/.gitignore | 1 + modules/container-base/pom.xml | 36 +++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 modules/container-base/.gitignore diff --git a/modules/container-base/.gitignore b/modules/container-base/.gitignore new file mode 100644 index 00000000000..d75620abf70 --- /dev/null +++ b/modules/container-base/.gitignore @@ -0,0 +1 @@ +.flattened-pom.xml diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml index 0e8f24a781b..cee3989661a 100644 --- a/modules/container-base/pom.xml +++ b/modules/container-base/pom.xml @@ -108,6 +108,42 @@ + + + + org.codehaus.mojo + flatten-maven-plugin + 1.2.7 + + true + oss + + remove + remove + + + + + + flatten + process-resources + + flatten + + + + + flatten.clean + clean + + clean + + + + 0.40.2 - ghcr.io From 64f84ea461d0fc8d1e4147b1bdcb8b86c2bafcd0 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 31 Aug 2022 01:56:51 +0200 Subject: [PATCH 201/608] style(ct-base): make up base image name from tag and add default With defaulting to develop, we rest on using any build of the image during experimentation etc to go with a (local) develop tag. Removing the Java version from the tag makes it easier to use and reflects the nature of it. It aligns image builds with the release schema of the actual application while still allowing for experiments and having different sources of truth for released and develop code. --- modules/container-base/pom.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml index 12eb3b137ff..67e2c2f9911 100644 --- a/modules/container-base/pom.xml +++ b/modules/container-base/pom.xml @@ -39,7 +39,8 @@ ct docker-build - gdcc/base:${target.java.version}-jre + gdcc/base:${base.image.tag} + develop eclipse-temurin:${target.java.version}-jre 1000 1000 From 5a986af6cc7651fd43ec5a4207349dab17b6651e Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 31 Aug 2022 01:57:16 +0200 Subject: [PATCH 202/608] chore(deps): make container profile use Payara 5.2022.3 --- modules/dataverse-parent/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 7a3b71fb68c..86b46817635 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -340,7 +340,7 @@ See also: https://github.com/IQSS/dataverse/issues/8048 See also: https://github.com/payara/Payara/issues/5368 --> - 5.2022.2 + 5.2022.3 From 65f9d6356b8caca3ddd54e323c838e6b9749f3cc Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 31 Aug 2022 02:02:32 +0200 Subject: [PATCH 203/608] feat(ct-base): enable base image pushes for master and develop branch - Make pushes to develop or master branch release a container image to Docker Hub by default (can be changed / extended). - Defaulting to the develop tag by default makes it more reusable for depending workflows based on pull requests. - Moving all multi-arch building to only happen on pushes, as it will be done during push/deploy phase only and those need credentials only avail in git push context running at repo owner of CI action. - Removing the Java version matrix parameter, too - we are gonna stick with what is default for releasing the images as they are meant to be a somewhat reliable base. It's still open for experiments. --- .github/workflows/container_base_push.yml | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index fc23b30d8ad..82c7a376ae0 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -5,16 +5,21 @@ on: push: branches: - 'develop' + - 'master' paths: - 'modules/container-base/**' - 'modules/dataverse-parent/pom.xml' pull_request: branches: - 'develop' + - 'master' paths: - 'modules/container-base/**' - 'modules/dataverse-parent/pom.xml' +env: + IMAGE_TAG: develop + REGISTRY: docker.io jobs: build: @@ -45,14 +50,21 @@ jobs: key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} restore-keys: ${{ runner.os }}-m2 - - name: Set up QEMU for multi-arch builds - uses: docker/setup-qemu-action@v2 - - name: Build base container image - run: mvn -f modules/container-base -Pct package -Dtarget.java.version=${{ matrix.jdk }} + - name: Build base container image with local architecture + run: mvn -f modules/container-base -Pct package - if: ${{ github.event_name == 'push' }} # run only if this is a push - PRs have no access to secrets name: Log in to the Container registry uses: docker/login-action@v1 with: username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} \ No newline at end of file + password: ${{ secrets.DOCKERHUB_TOKEN }} + - if: ${{ github.event_name == 'push' }} # run only if this is a push - multi-arch makes no sense with PR + name: Set up QEMU for multi-arch builds + uses: docker/setup-qemu-action@v2 + - name: Re-set image tag based on branch + if: ${{ github.ref == 'master' }} + run: echo "IMAGE_TAG=release" + - if: ${{ github.event_name == 'push' }} # run only if this is a push - tag push will only succeed in upstream + name: Deploy multi-arch base container image to Docker Hub + run: mvn -f modules/container-base -Pct deploy -Dbase.image.tag=${{ env.IMAGE_TAG }} -Ddocker.registry=${{ env.REGISTRY }} From 8f39ef2c6e564af53756895a0115e0d58f24d602 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 14 Sep 2022 16:42:29 +0200 Subject: [PATCH 204/608] style(ct-base): upgrade Dockerfile with heredocs #8932 Instead of using "&& \" style continuation of a RUN layer, newer Docker versions (since 2021) allow usage of heredocs. Also move some ARG to more suitable places --- .../container-base/src/main/docker/Dockerfile | 158 ++++++++++-------- 1 file changed, 85 insertions(+), 73 deletions(-) diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index caec4ee6619..68b9da13c67 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -67,43 +67,47 @@ ENV PATH="${PATH}:${PAYARA_DIR}/bin" \ ENABLE_JDWP=0 \ ENABLE_RELOAD=0 -ARG JATTACH_VERSION="v2.1" -ARG JATTACH_CHECKSUM="07885fdc782e02e7302c6d190f54c3930afa10a38140365adf54076ec1086a8e" -ARG PKGS="jq imagemagick curl unzip wget acl dirmngr gpg lsof procps netcat tini" -ARG ASADMIN="${PAYARA_DIR}/bin/asadmin --user=${ADMIN_USER} --passwordfile=${PASSWORD_FILE}" - ### PART 1: SYSTEM ### ARG UID=1000 ARG GID=1000 USER root WORKDIR / SHELL ["/bin/bash", "-euo", "pipefail", "-c"] -RUN true && \ +RUN <> /tmp/password-change-file.txt && \ - echo "AS_ADMIN_PASSWORD=${ADMIN_PASSWORD}" >> ${PASSWORD_FILE} && \ - asadmin --user=${ADMIN_USER} --passwordfile=/tmp/password-change-file.txt change-admin-password --domain_name=${DOMAIN_NAME} && \ + echo "AS_ADMIN_PASSWORD=" > /tmp/password-change-file.txt + echo "AS_ADMIN_NEWPASSWORD=${ADMIN_PASSWORD}" >> /tmp/password-change-file.txt + echo "AS_ADMIN_PASSWORD=${ADMIN_PASSWORD}" >> ${PASSWORD_FILE} + asadmin --user=${ADMIN_USER} --passwordfile=/tmp/password-change-file.txt change-admin-password --domain_name=${DOMAIN_NAME} # Start domain for configuration - ${ASADMIN} start-domain ${DOMAIN_NAME} && \ + ${ASADMIN} start-domain ${DOMAIN_NAME} # Allow access to admin with password only - ${ASADMIN} enable-secure-admin && \ + ${ASADMIN} enable-secure-admin + ### CONTAINER USAGE ENABLEMENT # List & delete memory settings from domain - for MEMORY_JVM_OPTION in $(${ASADMIN} list-jvm-options | grep "Xm[sx]\|Xss\|NewRatio"); \ - do \ - ${ASADMIN} delete-jvm-options $(echo $MEMORY_JVM_OPTION | sed -e 's/:/\\:/g'); \ - done && \ + for MEMORY_JVM_OPTION in $(${ASADMIN} list-jvm-options | grep "Xm[sx]\|Xss\|NewRatio"); + do + ${ASADMIN} delete-jvm-options $(echo $MEMORY_JVM_OPTION | sed -e 's/:/\\:/g'); + done # Tweak memory settings for containers - ${ASADMIN} create-jvm-options "-XX\:+UseContainerSupport" && \ - ${ASADMIN} create-jvm-options "-XX\:MaxRAMPercentage=\${ENV=MEM_MAX_RAM_PERCENTAGE}" && \ - ${ASADMIN} create-jvm-options "-Xss\${ENV=MEM_XSS}" && \ - ${ASADMIN} create-jvm-options "-XX\:MinHeapFreeRatio=\${ENV=MEM_MIN_HEAP_FREE_RATIO}" && \ - ${ASADMIN} create-jvm-options "-XX\:MaxHeapFreeRatio=\${ENV=MEM_MAX_HEAP_FREE_RATIO}" && \ - ${ASADMIN} create-jvm-options "-XX\:HeapDumpPath=\${ENV=DUMPS_DIR}" && \ + ${ASADMIN} create-jvm-options "-XX\:+UseContainerSupport" + ${ASADMIN} create-jvm-options "-XX\:MaxRAMPercentage=\${ENV=MEM_MAX_RAM_PERCENTAGE}" + ${ASADMIN} create-jvm-options "-Xss\${ENV=MEM_XSS}" + ${ASADMIN} create-jvm-options "-XX\:MinHeapFreeRatio=\${ENV=MEM_MIN_HEAP_FREE_RATIO}" + ${ASADMIN} create-jvm-options "-XX\:MaxHeapFreeRatio=\${ENV=MEM_MAX_HEAP_FREE_RATIO}" + ${ASADMIN} create-jvm-options "-XX\:HeapDumpPath=\${ENV=DUMPS_DIR}" # Set logging to console only for containers - ${ASADMIN} set-log-attributes com.sun.enterprise.server.logging.GFFileHandler.logtoFile=false && \ + ${ASADMIN} set-log-attributes com.sun.enterprise.server.logging.GFFileHandler.logtoFile=false \ + ### PRODUCTION READINESS - ${ASADMIN} create-jvm-options '-XX\:+UseG1GC' && \ - ${ASADMIN} create-jvm-options '-XX\:+UseStringDeduplication' && \ - ${ASADMIN} create-jvm-options '-XX\:+DisableExplicitGC' && \ - ${ASADMIN} create-jvm-options '-XX\:MaxGCPauseMillis=${ENV=MEM_MAX_GC_PAUSE_MILLIS}' && \ - ${ASADMIN} create-jvm-options '-XX\:MetaspaceSize=${ENV=MEM_METASPACE_SIZE}' && \ - ${ASADMIN} create-jvm-options '-XX\:MaxMetaspaceSize=${ENV=MEM_MAX_METASPACE_SIZE}' && \ - ${ASADMIN} create-jvm-options '-XX\:+IgnoreUnrecognizedVMOptions' && \ + ${ASADMIN} create-jvm-options '-XX\:+UseG1GC' + ${ASADMIN} create-jvm-options '-XX\:+UseStringDeduplication' + ${ASADMIN} create-jvm-options '-XX\:+DisableExplicitGC' + ${ASADMIN} create-jvm-options '-XX\:MaxGCPauseMillis=${ENV=MEM_MAX_GC_PAUSE_MILLIS}' + ${ASADMIN} create-jvm-options '-XX\:MetaspaceSize=${ENV=MEM_METASPACE_SIZE}' + ${ASADMIN} create-jvm-options '-XX\:MaxMetaspaceSize=${ENV=MEM_MAX_METASPACE_SIZE}' + ${ASADMIN} create-jvm-options '-XX\:+IgnoreUnrecognizedVMOptions' # Disable autodeploy and hot reload - ${ASADMIN} set configs.config.server-config.admin-service.das-config.dynamic-reload-enabled="false" && \ - ${ASADMIN} set configs.config.server-config.admin-service.das-config.autodeploy-enabled="false" && \ + ${ASADMIN} set configs.config.server-config.admin-service.das-config.dynamic-reload-enabled="false" + ${ASADMIN} set configs.config.server-config.admin-service.das-config.autodeploy-enabled="false" # Enlarge thread pools - ${ASADMIN} set server-config.thread-pools.thread-pool.http-thread-pool.max-thread-pool-size="50" && \ - ${ASADMIN} set server-config.thread-pools.thread-pool.http-thread-pool.max-queue-size="" && \ - ${ASADMIN} set default-config.thread-pools.thread-pool.thread-pool-1.max-thread-pool-size="250" && \ + ${ASADMIN} set server-config.thread-pools.thread-pool.http-thread-pool.max-thread-pool-size="50" + ${ASADMIN} set server-config.thread-pools.thread-pool.http-thread-pool.max-queue-size="" + ${ASADMIN} set default-config.thread-pools.thread-pool.thread-pool-1.max-thread-pool-size="250" # Enable file caching - ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-1.http.file-cache.enabled="true" && \ - ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-2.http.file-cache.enabled="true" && \ - ${ASADMIN} set default-config.network-config.protocols.protocol.http-listener-1.http.file-cache.enabled="true" && \ - ${ASADMIN} set default-config.network-config.protocols.protocol.http-listener-2.http.file-cache.enabled="true" && \ + ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-1.http.file-cache.enabled="true" + ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-2.http.file-cache.enabled="true" + ${ASADMIN} set default-config.network-config.protocols.protocol.http-listener-1.http.file-cache.enabled="true" + ${ASADMIN} set default-config.network-config.protocols.protocol.http-listener-2.http.file-cache.enabled="true" # Disable the HTTPS listener (we are always fronting our appservers with a reverse proxy handling SSL) - ${ASADMIN} set configs.config.server-config.network-config.network-listeners.network-listener.http-listener-2.enabled="false" && \ - # Enlarge and tune EJB pools (cannot do this for server-config as set does not create new entries) \ - ${ASADMIN} set default-config.ejb-container.pool-resize-quantity="2" && \ - ${ASADMIN} set default-config.ejb-container.max-pool-size="128" && \ - ${ASADMIN} set default-config.ejb-container.steady-pool-size="10" && \ + ${ASADMIN} set configs.config.server-config.network-config.network-listeners.network-listener.http-listener-2.enabled="false" + # Enlarge and tune EJB pools (cannot do this for server-config as set does not create new entries) + ${ASADMIN} set default-config.ejb-container.pool-resize-quantity="2" + ${ASADMIN} set default-config.ejb-container.max-pool-size="128" + ${ASADMIN} set default-config.ejb-container.steady-pool-size="10" # Misc settings - ${ASADMIN} create-system-properties fish.payara.classloading.delegate="false" && \ - ${ASADMIN} create-system-properties jersey.config.client.readTimeout="300000" && \ - ${ASADMIN} create-system-properties jersey.config.client.connectTimeout="300000" && \ + ${ASADMIN} create-system-properties fish.payara.classloading.delegate="false" + ${ASADMIN} create-system-properties jersey.config.client.readTimeout="300000" + ${ASADMIN} create-system-properties jersey.config.client.connectTimeout="300000" \ + ### DATAVERSE APPLICATION SPECIFICS # Configure the MicroProfile directory config source to point to /secrets - ${ASADMIN} set-config-dir --directory="${SECRETS_DIR}" && \ + ${ASADMIN} set-config-dir --directory="${SECRETS_DIR}" # Make request timeouts configurable via MPCONFIG (default to 900 secs = 15 min) - ${ASADMIN} set 'server-config.network-config.protocols.protocol.http-listener-1.http.request-timeout-seconds=${MPCONFIG=dataverse.http.timeout:900}' && \ + ${ASADMIN} set 'server-config.network-config.protocols.protocol.http-listener-1.http.request-timeout-seconds=${MPCONFIG=dataverse.http.timeout:900}' # TODO: what of the below 3 items can be deleted for container usage? - ${ASADMIN} create-network-listener --protocol=http-listener-1 --listenerport=8009 --jkenabled=true jk-connector && \ - ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-1.http.comet-support-enabled=true && \ - ${ASADMIN} create-system-properties javax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl && \ + ${ASADMIN} create-network-listener --protocol=http-listener-1 --listenerport=8009 --jkenabled=true jk-connector + ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-1.http.comet-support-enabled=true + ${ASADMIN} create-system-properties javax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl # Always disable phoning home... - ${ASADMIN} disable-phone-home && \ + ${ASADMIN} disable-phone-home \ + ### CLEANUP # Stop domain - ${ASADMIN} stop-domain "${DOMAIN_NAME}" && \ - # Disable JSP servlet dynamic reloads \ - sed -i 's#org.apache.jasper.servlet.JspServlet#org.apache.jasper.servlet.JspServlet\n \n development\n false\n \n \n genStrAsCharArray\n true\n #' "${DOMAIN_DIR}/config/default-web.xml" && \ + ${ASADMIN} stop-domain "${DOMAIN_NAME}" + # Disable JSP servlet dynamic reloads + sed -i 's#org.apache.jasper.servlet.JspServlet#org.apache.jasper.servlet.JspServlet\n \n development\n false\n \n \n genStrAsCharArray\n true\n #' "${DOMAIN_DIR}/config/default-web.xml" # Cleanup old CA certificates to avoid unnecessary log clutter during startup - ${SCRIPT_DIR}/removeExpiredCaCerts.sh && \ + ${SCRIPT_DIR}/removeExpiredCaCerts.sh # Delete generated files rm -rf \ "/tmp/password-change-file.txt" \ "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/osgi-cache" \ "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/logs" +EOF # Make docroot of Payara reside in higher level directory for easier targeting # Due to gdcc/dataverse-kubernetes#177: create the generated pathes so they are # writeable by us. TBR with gdcc/dataverse-kubernetes#178. -RUN rm -rf "${DOMAIN_DIR}"/docroot && \ - ln -s "${DOCROOT_DIR}" "${DOMAIN_DIR}"/docroot && \ +RUN < Date: Wed, 14 Sep 2022 21:18:28 +0200 Subject: [PATCH 205/608] feat,fix(ct-base): add extension point for background script #8932 By moving from tini to dumb-init, we can offer a new extension point: if an application image extending this base image provides an executable script at ${SCRIPT_DIR}/startInBackground.sh, it will be executed after the init scripts and in parallel to the application server. By adding ${SCRIPT_DIR} to $PATH, we can now also skip variable expansion, fixing a bug: formerly, the "exec" in entrypoint.sh and startInForeground.sh where not replacing the shell properly. The switch to dumb-init makes sure signals will be transferred also to any background processes! --- .../container-base/src/main/docker/Dockerfile | 10 +++++----- .../src/main/docker/scripts/entrypoint.sh | 17 ++++++++++++++++- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index 68b9da13c67..c56abb975e2 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -46,7 +46,7 @@ ENV PAYARA_DIR="${HOME_DIR}/appserver" \ ADMIN_PASSWORD="admin" \ DOMAIN_NAME="domain1" \ PAYARA_ARGS="" -ENV PATH="${PATH}:${PAYARA_DIR}/bin" \ +ENV PATH="${PATH}:${PAYARA_DIR}/bin:${SCRIPT_DIR}" \ DOMAIN_DIR="${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}" \ DEPLOY_PROPS="" \ PREBOOT_COMMANDS="${CONFIG_DIR}/pre-boot-commands.asadmin" \ @@ -88,7 +88,7 @@ EOF ARG JATTACH_VERSION="v2.1" ARG JATTACH_CHECKSUM="07885fdc782e02e7302c6d190f54c3930afa10a38140365adf54076ec1086a8e" -ARG PKGS="jq imagemagick curl unzip wget acl dirmngr gpg lsof procps netcat tini" +ARG PKGS="jq imagemagick curl unzip wget acl dirmngr gpg lsof procps netcat dumb-init" # Installing the packages in an extra container layer for better caching RUN < Date: Wed, 14 Sep 2022 21:32:51 +0200 Subject: [PATCH 206/608] docs(ct-base): document startInBackground.sh #8932 --- .../source/container/base-image.rst | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst index 524ef8a7fbe..3f7b3b46c85 100644 --- a/doc/sphinx-guides/source/container/base-image.rst +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -23,7 +23,8 @@ The base image provides: - `Payara Community Application Server `_ - CLI tools necessary to run Dataverse (i. e. ``curl`` or ``jq`` - see also :doc:`../installation/prerequisites` in Installation Guide) - Linux tools for analysis, monitoring and so on -- `Jattach `_ +- `Jattach `__ (attach to running JVM) +- `dumb-init `__ (see :ref:`below ` for details) This image is created as a "multi-arch image", supporting the most common architectures Dataverse usually runs on: AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2). @@ -246,6 +247,22 @@ its sources plus uncached scheduled nightly builds to make sure security updates Note: for the Github Action to be able to push to Docker Hub, two repository secrets (DOCKERHUB_USERNAME, DOCKERHUB_TOKEN) have been added by IQSS admins to their repository. +.. _base-entrypoint: + +Entry & Extension Points +++++++++++++++++++++++++ + +The entrypoint shell script provided by this base image will by default ensure to: + +- Run any scripts named ``${SCRIPT_DIR}/init_*`` or in ``${SCRIPT_DIR}/init.d/*`` directory for initialization + **before** the application server starts. +- Run an executable script ``${SCRIPT_DIR}/startInBackground.sh`` in the background - if present. +- Run the application server startup scripting in foreground (``${SCRIPT_DIR}/startInForeground.sh``). + +If you need to create some scripting that runs in parallel under supervision of `dumb-init `_, +e.g. to wait for the application to deploy before executing something, this is your point of extension: simply provide +the ``${SCRIPT_DIR}/startInBackground.sh`` executable script with your application image. + Other Hints From f8bf73479708a0d1cfb6882db9a118e12d70d34d Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 14 Sep 2022 21:50:06 +0200 Subject: [PATCH 207/608] ci(shellcheck,shellspec): split ShellCheck and ShellSpec To avoid unnecessary Shellspec runs for scripts that have no such tests, branch out the Shellcheck part of it into different workflow. Also make "bash" explicit as the container base image using an "unknown shebang" via dumb-init, but it's simply bash. --- .github/workflows/shellcheck.yml | 24 ++++++++++++++++++++++++ .github/workflows/shellspec.yml | 14 -------------- 2 files changed, 24 insertions(+), 14 deletions(-) create mode 100644 .github/workflows/shellcheck.yml diff --git a/.github/workflows/shellcheck.yml b/.github/workflows/shellcheck.yml new file mode 100644 index 00000000000..2d910f54127 --- /dev/null +++ b/.github/workflows/shellcheck.yml @@ -0,0 +1,24 @@ +name: "Shellcheck" +on: + push: + paths: + - conf/solr/** + - modules/container-base/** + pull_request: + paths: + - conf/solr/** + - modules/container-base/** +jobs: + shellcheck: + name: Shellcheck + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: shellcheck + uses: reviewdog/action-shellcheck@v1 + with: + github_token: ${{ secrets.github_token }} + reporter: github-pr-review # Change reporter. + fail_on_error: true + # Container base image uses dumb-init shebang, so nail to using bash + shellcheck_flags: "--shell=bash --external-sources" \ No newline at end of file diff --git a/.github/workflows/shellspec.yml b/.github/workflows/shellspec.yml index 2b127a7be5c..5c251cfc897 100644 --- a/.github/workflows/shellspec.yml +++ b/.github/workflows/shellspec.yml @@ -4,29 +4,15 @@ on: paths: - tests/shell/** - conf/solr/** - - modules/container-base/** # add more when more specs are written relying on data pull_request: paths: - tests/shell/** - conf/solr/** - - modules/container-base/** # add more when more specs are written relying on data env: SHELLSPEC_VERSION: 0.28.1 jobs: - shellcheck: - name: Shellcheck - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: shellcheck - uses: reviewdog/action-shellcheck@v1 - with: - github_token: ${{ secrets.github_token }} - reporter: github-pr-review # Change reporter. - fail_on_error: true - exclude: "./tests/shell/*" shellspec-ubuntu: name: "Ubuntu" runs-on: ubuntu-latest From 626b4951cfbf163895ce75e605b4daec455e0aae Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 20 Sep 2022 00:22:38 +0200 Subject: [PATCH 208/608] docs(ct-base): clarify support image tags #8932 Adding notes about the image tags produced by the community for reuse in the community. Document final tagging strategy, using the branch name (develop/main) instead of the Java version or sth. Reshape the automated builds and publishing part to be included in the supported tags and build instructions section to reduce text complexity and group matching parts together. --- .../source/container/base-image.rst | 40 +++++++++++++------ 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst index 3f7b3b46c85..ea54ecbebd2 100644 --- a/doc/sphinx-guides/source/container/base-image.rst +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -12,6 +12,17 @@ IQSS will not offer you support how to deploy or run it, please reach out to the You might be interested in taking a look at :doc:`../developers/containers`, linking you to some (community-based) efforts. +Supported Image Tags +++++++++++++++++++++ + +This image is sourced within the main upstream code repository of the Dataverse software. Development and maintenance +happens there (again, by the community). Community supported image tags are based on the two most important branches: + +- ``develop`` representing the unstable state of affairs in Dataverse's development branch + (`Dockerfile `__) +- ``release`` representing the latest stable release in Dataverse's main branch + (`Dockerfile `__) + Image Contents @@ -51,8 +62,12 @@ Or move to the module and execute: Some additional notes, using Maven parameters to change the build and use ...: +- | ... a different tag only: add ``-Dbase.image.tag=tag``. + | *Note:* default is ``develop`` - | ... a different image name and tag: add ``-Dbase.image=name:tag``. - | *Note:* default is ``gdcc/base:${target.java.version}-jre`` + | *Note:* default is ``gdcc/base:${base.image.tag}`` +- ... a different image registry than *Docker Hub*: add ``-Ddocker.registry=registry.example.org`` (see also + `DMP docs on registries `__) - ... a different Payara version: add ``-Dpayara.version=V.YYYY.R``. - | ... a different Temurin JRE version ``A``: add ``-Dtarget.java.version=A`` (i.e. ``11``, ``17``, ...). | *Note:* must resolve to an available Docker tag ``A-jre`` of Eclipse Temurin! @@ -61,6 +76,17 @@ Some additional notes, using Maven parameters to change the build and use ...: image available from local or remote (e. g. Docker Hub). - ... a different UID/GID for the ``payara`` user/group: add ``-Dbase.image.uid=1234`` (or ``.gid``) +Automated Builds & Publishing +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To make reusing most simple, the image is built with a Github Action within the IQSS repository and then pushed +to `Docker Hub gdcc/base repository `_. It is built and pushed on every edit to +its sources plus uncached scheduled nightly builds to make sure security updates are finding their way in. + +*Note:* For the Github Action to be able to push to Docker Hub, two repository secrets +(DOCKERHUB_USERNAME, DOCKERHUB_TOKEN) have been added by IQSS admins to their repository. + + Tunables ++++++++ @@ -234,18 +260,6 @@ The HTTPS listener (on port 8181) becomes deactivated during the build, as we wi application server and handle SSL/TLS termination at this point. Save the memory and some CPU cycles! -Publishing and Updates -++++++++++++++++++++++ - -This image is sourced within the main upstream code repository of the Dataverse software. Development and maintenance -happens there (again, by the community). - -To make reusing most simple, the image is built with a Github Action within the IQSS repository and then pushed -to `Docker Hub gdcc/base repository `_. It is built and pushed on every edit to -its sources plus uncached scheduled nightly builds to make sure security updates are finding their way in. - -Note: for the Github Action to be able to push to Docker Hub, two repository secrets -(DOCKERHUB_USERNAME, DOCKERHUB_TOKEN) have been added by IQSS admins to their repository. .. _base-entrypoint: From 77592113f310d314d7de11b372a60cf3b4e08600 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 20 Sep 2022 00:27:06 +0200 Subject: [PATCH 209/608] style,docs(ct-base): small word adjusts for some build options --- doc/sphinx-guides/source/container/base-image.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst index ea54ecbebd2..3e83af23bfb 100644 --- a/doc/sphinx-guides/source/container/base-image.rst +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -70,10 +70,10 @@ Some additional notes, using Maven parameters to change the build and use ...: `DMP docs on registries `__) - ... a different Payara version: add ``-Dpayara.version=V.YYYY.R``. - | ... a different Temurin JRE version ``A``: add ``-Dtarget.java.version=A`` (i.e. ``11``, ``17``, ...). - | *Note:* must resolve to an available Docker tag ``A-jre`` of Eclipse Temurin! + | *Note:* must resolve to an available image tag ``A-jre`` of Eclipse Temurin! (See also `Docker Hub search example `_) - ... a different Java Distribution: add ``-Djava.image="name:tag"`` with precise reference to an - image available from local or remote (e. g. Docker Hub). + image available local or remote. - ... a different UID/GID for the ``payara`` user/group: add ``-Dbase.image.uid=1234`` (or ``.gid``) Automated Builds & Publishing From 2141bcafae5fea8ac2414a0aecede81b988a7306 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 20 Sep 2022 00:48:16 +0200 Subject: [PATCH 210/608] docs(ct-base): add notes about multiarch builds #8932 Addin description on requirements to build cross platform added as subsection of the build instructions seemed valuable. --- .../source/container/base-image.rst | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst index 3e83af23bfb..41d88c97e2d 100644 --- a/doc/sphinx-guides/source/container/base-image.rst +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -37,8 +37,7 @@ The base image provides: - `Jattach `__ (attach to running JVM) - `dumb-init `__ (see :ref:`below ` for details) -This image is created as a "multi-arch image", supporting the most common architectures Dataverse usually runs on: -AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2). +This image is created as a "multi-arch image", see :ref:`below `. It inherits being built on an Ubuntu environment from the upstream `base image of Eclipse Temurin `_. @@ -86,6 +85,24 @@ its sources plus uncached scheduled nightly builds to make sure security updates *Note:* For the Github Action to be able to push to Docker Hub, two repository secrets (DOCKERHUB_USERNAME, DOCKERHUB_TOKEN) have been added by IQSS admins to their repository. +.. _base-multiarch: + +Processor Architecture and Multiarch +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This image is created as a "multi-arch image", supporting the most common architectures Dataverse usually runs on: +AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2), by using Maven Docker Plugin's *BuildX* mode. + +Building the image via ``mvn -Pct package`` or ``mvn -Pct install`` as above will only build for the architecture of +the Docker maschine's CPU. + +Only ``mvn -Pct deploy`` will trigger building on all enabled architectures. +Yet, to enable building with non-native code on your build machine, you will need to setup a cross-platform builder. + +On Linux, you should install `qemu-user-static `__ (preferably via +your package management) on the host and run ``docker run --rm --privileged multiarch/qemu-user-static --reset -p yes`` +to enable that builder. The Docker plugin will setup everything else for you. + Tunables @@ -290,8 +307,6 @@ you can read about those in a few places like https://developers.redhat.com/arti https://www.eclipse.org/openj9/docs/xxusecontainersupport, etc. The other memory defaults are inspired from `run-java-sh recommendations`_. -*Note: the build process used the newer ``buildx`` feature of Docker to provide multiarch images.* - .. _Pre/postboot script docs: https://docs.payara.fish/community/docs/Technical%20Documentation/Payara%20Micro%20Documentation/Payara%20Micro%20Configuration%20and%20Management/Micro%20Management/Asadmin%20Commands/Pre%20and%20Post%20Boot%20Commands.html From 8463424a725a8459c3e2c623f7e626000a164933 Mon Sep 17 00:00:00 2001 From: Jan Range Date: Tue, 20 Sep 2022 09:51:53 +0200 Subject: [PATCH 211/608] Added 'multiple' to metadatablock JSON export --- .../java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index e088122419d..1b7a52b1ea5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -550,6 +550,7 @@ public static JsonObjectBuilder json(DatasetFieldType fld) { fieldsBld.add("type", fld.getFieldType().toString()); fieldsBld.add("watermark", fld.getWatermark()); fieldsBld.add("description", fld.getDescription()); + fieldsBld.add("multiple", fld.isAllowMultiples()); if (!fld.getChildDatasetFieldTypes().isEmpty()) { JsonObjectBuilder subFieldsBld = jsonObjectBuilder(); for (DatasetFieldType subFld : fld.getChildDatasetFieldTypes()) { From 276b3b5159471bd44cff99bfb1b9e6b279634b4a Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 20 Sep 2022 10:49:19 +0200 Subject: [PATCH 212/608] feat(ct-base): add wait-for script to image Many scripts shipped with an app image might rely on the availability of an external service, API or simply the database or search index. Adding a standard script here to make it easier to wait for their availability. --- doc/sphinx-guides/source/container/base-image.rst | 1 + modules/container-base/src/main/docker/Dockerfile | 11 +++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst index 41d88c97e2d..197f4175538 100644 --- a/doc/sphinx-guides/source/container/base-image.rst +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -35,6 +35,7 @@ The base image provides: - CLI tools necessary to run Dataverse (i. e. ``curl`` or ``jq`` - see also :doc:`../installation/prerequisites` in Installation Guide) - Linux tools for analysis, monitoring and so on - `Jattach `__ (attach to running JVM) +- `wait-for `__ (tool to "wait for" a service to be available) - `dumb-init `__ (see :ref:`below ` for details) This image is created as a "multi-arch image", see :ref:`below `. diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index c56abb975e2..cafeb2ffb59 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -88,6 +88,8 @@ EOF ARG JATTACH_VERSION="v2.1" ARG JATTACH_CHECKSUM="07885fdc782e02e7302c6d190f54c3930afa10a38140365adf54076ec1086a8e" +ARG WAIT_FOR_VERSION="v2.2.3" +ARG WAIT_FOR_CHECKSUM="70271181be69cd2c7265b2746f97fccfd7e8aa1059894138a775369c23589ff4" ARG PKGS="jq imagemagick curl unzip wget acl dirmngr gpg lsof procps netcat dumb-init" # Installing the packages in an extra container layer for better caching @@ -95,12 +97,17 @@ RUN < Date: Tue, 20 Sep 2022 18:22:01 +0200 Subject: [PATCH 213/608] feat(upload): make upload file storage path configurable #6656 As outlined in IQSS#6656, files will be stored in `domaindir/generated/jsp/dataverse` during upload before being moved to our temporary ingest file space at `$dataverse.files.directory/temp`. With this commit, we enable to configure a different place for these kind of generated temporary files by using MPCONFIG variable substitution inside of glassfish-web.xml. Also sorts the content of glassfish-web.xml into order as specified by the XSD. Documentation of the setting is provided. --- doc/sphinx-guides/source/installation/config.rst | 11 +++++++++++ src/main/webapp/WEB-INF/glassfish-web.xml | 8 +++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 17d88c8ea31..72edaa0b456 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1406,6 +1406,17 @@ dataverse.files.directory This is how you configure the path Dataverse uses for temporary files. (File store specific dataverse.files.\.directory options set the permanent data storage locations.) +dataverse.files.uploads ++++++++++++++++++++++++ + +Configure a folder to store the incoming file stream during uploads (before transfering to `${dataverse.files.directory}/temp`). +You can use an absolute path or a relative, which is relative to the application server domain directory. + +Defaults to ``./uploads``, which resolves to ``/usr/local/payara5/glassfish/domains/domain1/uploads`` in a default +installation. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_FILES_UPLOADS``. + dataverse.auth.password-reset-timeout-in-minutes ++++++++++++++++++++++++++++++++++++++++++++++++ diff --git a/src/main/webapp/WEB-INF/glassfish-web.xml b/src/main/webapp/WEB-INF/glassfish-web.xml index ecd3ba15c40..e56d7013abf 100644 --- a/src/main/webapp/WEB-INF/glassfish-web.xml +++ b/src/main/webapp/WEB-INF/glassfish-web.xml @@ -8,9 +8,15 @@ Keep a copy of the generated servlet class' java code. + + - + + From 43c0681e568856f330f183b4585c2d4535cc2a99 Mon Sep 17 00:00:00 2001 From: cstr Date: Wed, 21 Sep 2022 17:55:37 +0800 Subject: [PATCH 214/608] Update OpenAireExportUtil.java --- .../iq/dataverse/export/openaire/OpenAireExportUtil.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java index ffce432ce3b..87604cdc988 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java @@ -964,7 +964,7 @@ public static void writeRelatedIdentifierElement(XMLStreamWriter xmlw, DatasetVe relatedIdentifierTypeMap.put("URL".toLowerCase(), "URL"); relatedIdentifierTypeMap.put("URN".toLowerCase(), "URN"); relatedIdentifierTypeMap.put("WOS".toLowerCase(), "WOS"); - relatedIdentifierTypeMap.put("CSTR".toLowerCase(), "CSTR"); + relatedIdentifierTypeMap.put("CSTR".toLowerCase(), "cstr"); } for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { From c715bb88e979f2fa21dd1b27fa9cf7b3108ee60f Mon Sep 17 00:00:00 2001 From: cstr Date: Wed, 21 Sep 2022 18:06:38 +0800 Subject: [PATCH 215/608] Update OpenAireExportUtil.java --- .../harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java index 87604cdc988..49fe203b96d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java @@ -964,7 +964,6 @@ public static void writeRelatedIdentifierElement(XMLStreamWriter xmlw, DatasetVe relatedIdentifierTypeMap.put("URL".toLowerCase(), "URL"); relatedIdentifierTypeMap.put("URN".toLowerCase(), "URN"); relatedIdentifierTypeMap.put("WOS".toLowerCase(), "WOS"); - relatedIdentifierTypeMap.put("CSTR".toLowerCase(), "cstr"); } for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { From 0959c84b0c0f35ef7602c5e48e0943aa89711982 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 22 Sep 2022 14:17:58 +0200 Subject: [PATCH 216/608] chore(deps): remove Payara version from Maven ct profile With the merge of #8949 the custom version is no longer necessary. --- modules/dataverse-parent/pom.xml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 9326ba71263..ce4dfb56257 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -334,13 +334,7 @@ ct - - 5.2022.3 + From 5c79de8fb1ea1c50455595819864262f8f891e74 Mon Sep 17 00:00:00 2001 From: cstr Date: Fri, 23 Sep 2022 11:29:23 +0800 Subject: [PATCH 217/608] Update AdminIT.java resolve AdminIT junit.framework.AssertionFailedError: expected:<322> but was:<323> --- src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java index cf06fd9937b..91f78ca6238 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java @@ -762,7 +762,7 @@ public void testLoadMetadataBlock_NoErrorPath() { assertEquals(1, data.size()); List> addedElements = data.get("added"); //Note -test depends on the number of elements in the production citation block, so any changes to the # of elements there can break this test - assertEquals(322, addedElements.size()); + assertEquals(323, addedElements.size()); Map statistics = new HashMap<>(); for (Map unit : addedElements) { From 4bfca4a243ae2795329a8df8ffd3a2f3f1aa36d6 Mon Sep 17 00:00:00 2001 From: cstr Date: Mon, 26 Sep 2022 09:16:25 +0800 Subject: [PATCH 218/608] Update AdminIT.java I should find the reason for the failure, please rebuild --- src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java index 91f78ca6238..bcee8d18e17 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java @@ -778,7 +778,7 @@ public void testLoadMetadataBlock_NoErrorPath() { assertEquals(3, statistics.size()); assertEquals(1, (int) statistics.get("MetadataBlock")); assertEquals(78, (int) statistics.get("DatasetField")); - assertEquals(243, (int) statistics.get("Controlled Vocabulary")); + assertEquals(244, (int) statistics.get("Controlled Vocabulary")); } @Test From b7ea43047fce4491284b12b1ae7403fc248b6f05 Mon Sep 17 00:00:00 2001 From: chenganj Date: Mon, 26 Sep 2022 16:02:16 -0400 Subject: [PATCH 219/608] license internationalization --- .../edu/harvard/iq/dataverse/dataset/DatasetUtil.java | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java index 16ea09919af..2db20377169 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java @@ -25,12 +25,8 @@ import java.nio.channels.FileChannel; import java.nio.file.Files; import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.List; +import java.util.*; import java.util.logging.Logger; -import java.util.Base64; -import java.util.HashMap; -import java.util.Map; import javax.imageio.ImageIO; import org.apache.commons.io.IOUtils; import static edu.harvard.iq.dataverse.dataaccess.DataAccess.getStorageIO; @@ -577,8 +573,6 @@ public static String getLicenseIcon(DatasetVersion dsv) { public static String getLicenseDescription(DatasetVersion dsv) { License license = DatasetUtil.getLicense(dsv); - return license != null ? license.getShortDescription() : BundleUtil.getStringFromBundle("license.custom.description"); - License license = dsv.getTermsOfUseAndAccess().getLicense(); if (license != null) { return getLocalizedLicenseDescription(license.getName()) ; From 4af2c05a80949974989baf6f5d61348a4d31b3de Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 27 Sep 2022 12:33:05 -0400 Subject: [PATCH 220/608] support signedURL in findUser --- .../harvard/iq/dataverse/api/AbstractApiBean.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java index 3b22fb83836..22d1f668949 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java @@ -363,7 +363,7 @@ protected AuthenticatedUser findUserByApiToken( String apiKey ) { protected User findUserOrDie() throws WrappedResponse { final String requestApiKey = getRequestApiKey(); final String requestWFKey = getRequestWorkflowInvocationID(); - if (requestApiKey == null && requestWFKey == null) { + if (requestApiKey == null && requestWFKey == null && getRequestParameter("token")==null) { return GuestUser.get(); } PrivateUrlUser privateUrlUser = privateUrlSvc.getPrivateUrlUserFromToken(requestApiKey); @@ -437,19 +437,19 @@ private AuthenticatedUser getAuthenticatedUserFromSignedUrl() { // that as a secret in validation the signedURL. If the signature can't be // validating with their key, the user (or their API key) has been changed and // we reject the request. - //ToDo - add null checks/ verify that calling methods catch things. + // ToDo - add null checks/ verify that calling methods catch things. String user = httpRequest.getParameter("user"); AuthenticatedUser targetUser = authSvc.getAuthenticatedUser(user); - String key = System.getProperty(SystemConfig.API_SIGNING_SECRET,"") + authSvc.findApiTokenByUser(targetUser).getTokenString(); - String signedUrl = httpRequest.getRequestURL().toString()+"?"+httpRequest.getQueryString(); + String key = System.getProperty(SystemConfig.API_SIGNING_SECRET, "") + + authSvc.findApiTokenByUser(targetUser).getTokenString(); + String signedUrl = httpRequest.getRequestURL().toString() + "?" + httpRequest.getQueryString(); String method = httpRequest.getMethod(); - String queryString = httpRequest.getQueryString(); boolean validated = UrlSignerUtil.isValidUrl(signedUrl, user, method, key); - if (validated){ + if (validated) { authUser = targetUser; } return authUser; - } + } protected Dataverse findDataverseOrDie( String dvIdtf ) throws WrappedResponse { Dataverse dv = findDataverse(dvIdtf); From 0cd3e4a5212537b19adf5b040882f59277b5ca2e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 27 Sep 2022 12:42:55 -0400 Subject: [PATCH 221/608] update/fix requestSignedURL - use the user if supplied - require superuser --- .../edu/harvard/iq/dataverse/api/Admin.java | 37 +++++++++++-------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 7a145143306..c9ce12fec98 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -2249,29 +2249,34 @@ public Response getBannerMessages(@PathParam("id") Long id) throws WrappedRespon @Consumes("application/json") @Path("/requestSignedUrl") public Response getSignedUrl(JsonObject urlInfo) throws WrappedResponse { - AuthenticatedUser superuser = authSvc.getAdminUser(); + AuthenticatedUser superuser = findAuthenticatedUserOrDie(); - if (superuser == null) { + if (superuser == null || !superuser.isSuperuser()) { return error(Response.Status.FORBIDDEN, "Requesting signed URLs is restricted to superusers."); } String userId = urlInfo.getString("user"); String key=null; - if(userId!=null) { - AuthenticatedUser user = authSvc.getAuthenticatedUser(userId); - if(user!=null) { - ApiToken apiToken = authSvc.findApiTokenByUser(user); - if(apiToken!=null && !apiToken.isExpired() && ! apiToken.isDisabled()) { - key = apiToken.getTokenString(); + if (userId != null) { + AuthenticatedUser user = authSvc.getAuthenticatedUser(userId); + // If a user param was sent, we sign the URL for them, otherwise on behalf of + // the superuser who made this api call + if (user != null) { + ApiToken apiToken = authSvc.findApiTokenByUser(user); + if (apiToken != null && !apiToken.isExpired() && !apiToken.isDisabled()) { + key = apiToken.getTokenString(); + } + } else { + userId = superuser.getUserIdentifier(); + // We ~know this exists - the superuser just used it and it was unexpired/not + // disabled. (ToDo - if we want this to work with workflow tokens (or as a + // signed URL), we should do more checking as for the user above)) + key = authSvc.findApiTokenByUser(superuser).getTokenString(); } - } else { - userId=superuser.getIdentifier(); - //We ~know this exists - the superuser just used it and it was unexpired/not disabled. (ToDo - if we want this to work with workflow tokens (or as a signed URL, we should do more checking as for the user above)) - } - key = System.getProperty(SystemConfig.API_SIGNING_SECRET,"") + authSvc.findApiTokenByUser(superuser).getTokenString(); - } - if(key==null) { - return error(Response.Status.CONFLICT, "Do not have a valid user with apiToken"); + if (key == null) { + return error(Response.Status.CONFLICT, "Do not have a valid user with apiToken"); + } + key = System.getProperty(SystemConfig.API_SIGNING_SECRET, "") + key; } String baseUrl = urlInfo.getString("url"); From 1095f96253a39c3438dceb29a66be49ad803480d Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 27 Sep 2022 12:43:22 -0400 Subject: [PATCH 222/608] reduce duplication --- .../harvard/iq/dataverse/externaltools/ExternalTool.java | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java index bda9ebad063..97838b45cc5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java @@ -118,14 +118,7 @@ public ExternalTool() { } public ExternalTool(String displayName, String toolName, String description, List externalToolTypes, Scope scope, String toolUrl, String toolParameters, String contentType) { - this.displayName = displayName; - this.toolName = toolName; - this.description = description; - this.externalToolTypes = externalToolTypes; - this.scope = scope; - this.toolUrl = toolUrl; - this.toolParameters = toolParameters; - this.contentType = contentType; + this(displayName, toolName, description, externalToolTypes, scope, toolUrl, toolParameters, contentType, null); } public ExternalTool(String displayName, String toolName, String description, List externalToolTypes, Scope scope, String toolUrl, String toolParameters, String contentType, String allowedApiCalls) { From 01fb249eb8dda62da4b7232ffacd0449dd1888d6 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 27 Sep 2022 12:45:32 -0400 Subject: [PATCH 223/608] cleanup, add hasToken call --- .../iq/dataverse/util/UrlSignerUtil.java | 35 +++++++++++++------ 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java index b11334520e6..ee3dd127196 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse.util; +import java.net.MalformedURLException; import java.net.URL; import java.nio.charset.Charset; import java.util.List; @@ -34,7 +35,7 @@ public class UrlSignerUtil { * @return - the signed URL */ public static String signUrl(String baseUrl, Integer timeout, String user, String method, String key) { - StringBuilder signedUrl = new StringBuilder(baseUrl); + StringBuilder signedUrlBuilder = new StringBuilder(baseUrl); boolean firstParam = true; if (baseUrl.contains("?")) { @@ -44,26 +45,26 @@ public static String signUrl(String baseUrl, Integer timeout, String user, Strin LocalDateTime validTime = LocalDateTime.now(); validTime = validTime.plusMinutes(timeout); validTime.toString(); - signedUrl.append(firstParam ? "?" : "&").append("until=").append(validTime); + signedUrlBuilder.append(firstParam ? "?" : "&").append("until=").append(validTime); firstParam = false; } if (user != null) { - signedUrl.append(firstParam ? "?" : "&").append("user=").append(user); + signedUrlBuilder.append(firstParam ? "?" : "&").append("user=").append(user); firstParam = false; } if (method != null) { - signedUrl.append(firstParam ? "?" : "&").append("method=").append(method); + signedUrlBuilder.append(firstParam ? "?" : "&").append("method=").append(method); firstParam=false; } - signedUrl.append(firstParam ? "?" : "&").append("token="); - logger.fine("String to sign: " + signedUrl.toString() + ""); - signedUrl.append(DigestUtils.sha512Hex(signedUrl.toString() + key)); - logger.fine("Generated Signed URL: " + signedUrl.toString()); + signedUrlBuilder.append(firstParam ? "?" : "&").append("token="); + logger.fine("String to sign: " + signedUrlBuilder.toString() + ""); + String signedUrl = signedUrlBuilder.toString(); + signedUrl= signedUrl + (DigestUtils.sha512Hex(signedUrl + key)); if (logger.isLoggable(Level.FINE)) { logger.fine( - "URL signature is " + (isValidUrl(signedUrl.toString(), user, method, key) ? "valid" : "invalid")); + "URL signature is " + (isValidUrl(signedUrl, user, method, key) ? "valid" : "invalid")); } - return signedUrl.toString(); + return signedUrl; } /** @@ -148,4 +149,18 @@ public static boolean isValidUrl(String signedUrl, String user, String method, S return valid; } + public static boolean hasToken(String urlString) { + try { + URL url = new URL(urlString); + List params = URLEncodedUtils.parse(url.getQuery(), Charset.forName("UTF-8")); + for (NameValuePair nvp : params) { + if (nvp.getName().equals("token")) { + return true; + } + } + } catch (MalformedURLException mue) { + logger.fine("Bad url string: " + urlString); + } + return false; + } } From 12d98fad38d1bbec0f523aefc769f4459e83e488 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 27 Sep 2022 12:46:28 -0400 Subject: [PATCH 224/608] remove ; in jvm option name --- src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index e0d016c6137..13d12ce79ed 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -125,7 +125,7 @@ public class SystemConfig { // A secret used in signing URLs - individual urls are signed using this and the // intended user's apiKey, creating an aggregate key that is unique to the user // but not known to the user (as their apiKey is) - public final static String API_SIGNING_SECRET = "dataverse.api-signing-secret;"; + public final static String API_SIGNING_SECRET = "dataverse.api-signing-secret"; public String getVersion() { return getVersion(false); From 2287438382c242e50f9e91536eae9e7c534fbf25 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 27 Sep 2022 12:48:17 -0400 Subject: [PATCH 225/608] remove unnecessary 'apis' json object from manifest structure --- .../externaltools/ExternalToolServiceBean.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java index 432aa26714d..a65ad2427ba 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java @@ -169,8 +169,8 @@ public static ExternalTool parseAddExternalToolManifest(String manifest) { String toolUrl = getRequiredTopLevelField(jsonObject, TOOL_URL); JsonObject toolParametersObj = jsonObject.getJsonObject(TOOL_PARAMETERS); JsonArray queryParams = toolParametersObj.getJsonArray("queryParameters"); - JsonObject allowedApiCallsObj = jsonObject.getJsonObject(ALLOWED_API_CALLS); - JsonArray apis = allowedApiCallsObj.getJsonArray("apis"); + JsonArray allowedApiCallsArray = jsonObject.getJsonArray(ALLOWED_API_CALLS); + boolean allRequiredReservedWordsFound = false; if (scope.equals(Scope.FILE)) { List requiredReservedWordCandidates = new ArrayList<>(); @@ -223,9 +223,11 @@ public static ExternalTool parseAddExternalToolManifest(String manifest) { } String toolParameters = toolParametersObj.toString(); - String allowedApiCalls = allowedApiCallsObj.toString(); + String allowedApiCalls = null; + if(allowedApiCallsArray !=null) { + allowedApiCalls = allowedApiCallsArray.toString(); + } -// return new ExternalTool(displayName, toolName, description, externalToolTypes, scope, toolUrl, toolParameters, contentType); return new ExternalTool(displayName, toolName, description, externalToolTypes, scope, toolUrl, toolParameters, contentType, allowedApiCalls); } From 1f82b191a546b9a01f5dffee8773ba6313b06730 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 27 Sep 2022 12:51:17 -0400 Subject: [PATCH 226/608] refactor, remove extra user variable --- .../iq/dataverse/ConfigureFragmentBean.java | 1 - .../iq/dataverse/api/ExternalTools.java | 1 - .../externaltools/ExternalToolHandler.java | 230 +++++++++++------- .../iq/dataverse/util/URLTokenUtil.java | 23 +- 4 files changed, 143 insertions(+), 112 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ConfigureFragmentBean.java b/src/main/java/edu/harvard/iq/dataverse/ConfigureFragmentBean.java index 58752af8520..d51a73fd2dc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ConfigureFragmentBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ConfigureFragmentBean.java @@ -106,7 +106,6 @@ public void generateApiToken() { ApiToken apiToken = new ApiToken(); User user = session.getUser(); if (user instanceof AuthenticatedUser) { - toolHandler.setUser(((AuthenticatedUser) user).getUserIdentifier()); apiToken = authService.findApiTokenByUser((AuthenticatedUser) user); if (apiToken == null) { //No un-expired token diff --git a/src/main/java/edu/harvard/iq/dataverse/api/ExternalTools.java b/src/main/java/edu/harvard/iq/dataverse/api/ExternalTools.java index aef30bfb0c2..e53b54482b8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/ExternalTools.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/ExternalTools.java @@ -1,7 +1,6 @@ package edu.harvard.iq.dataverse.api; import edu.harvard.iq.dataverse.actionlogging.ActionLogRecord; -import static edu.harvard.iq.dataverse.api.AbstractApiBean.error; import edu.harvard.iq.dataverse.externaltools.ExternalTool; import edu.harvard.iq.dataverse.externaltools.ExternalToolServiceBean; import java.util.logging.Logger; diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java index be99e78c4d6..085c2a7b3bb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java @@ -16,18 +16,24 @@ import java.net.http.HttpClient; import java.net.http.HttpRequest; import java.net.http.HttpResponse; -import java.util.ArrayList; -import java.util.List; +import java.util.Map.Entry; import java.util.logging.Level; import java.util.logging.Logger; import javax.json.Json; import javax.json.JsonArray; import javax.json.JsonArrayBuilder; +import javax.json.JsonNumber; import javax.json.JsonObject; +import javax.json.JsonObjectBuilder; import javax.json.JsonString; +import javax.json.JsonValue; import javax.ws.rs.HttpMethod; +import org.apache.commons.codec.binary.StringUtils; + +import com.github.scribejava.core.java8.Base64; + /** * Handles an operation on a specific file. Requires a file id in order to be * instantiated. Applies logic based on an {@link ExternalTool} specification, @@ -35,29 +41,21 @@ */ public class ExternalToolHandler extends URLTokenUtil { - /** - * @param user the user to set - */ - public void setUser(String user) { - this.user = user; - } - private final ExternalTool externalTool; private String requestMethod; private String toolContext; - private String user; - /** * File level tool * * @param externalTool The database entity. - * @param dataFile Required. - * @param apiToken The apiToken can be null because "explore" tools can be - * used anonymously. + * @param dataFile Required. + * @param apiToken The apiToken can be null because "explore" tools can be + * used anonymously. */ - public ExternalToolHandler(ExternalTool externalTool, DataFile dataFile, ApiToken apiToken, FileMetadata fileMetadata, String localeCode) { + public ExternalToolHandler(ExternalTool externalTool, DataFile dataFile, ApiToken apiToken, + FileMetadata fileMetadata, String localeCode) { super(dataFile, apiToken, fileMetadata, localeCode); this.externalTool = externalTool; toolContext = externalTool.getToolUrl(); @@ -67,125 +65,169 @@ public ExternalToolHandler(ExternalTool externalTool, DataFile dataFile, ApiToke * Dataset level tool * * @param externalTool The database entity. - * @param dataset Required. - * @param apiToken The apiToken can be null because "explore" tools can be - * used anonymously. + * @param dataset Required. + * @param apiToken The apiToken can be null because "explore" tools can be + * used anonymously. */ public ExternalToolHandler(ExternalTool externalTool, Dataset dataset, ApiToken apiToken, String localeCode) { super(dataset, apiToken, localeCode); this.externalTool = externalTool; } - // TODO: rename to handleRequest() to someday handle sending headers as well as query parameters. + // TODO: rename to handleRequest() to someday handle sending headers as well as + // query parameters. public String handleRequest() { return handleRequest(false); } - - // TODO: rename to handleRequest() to someday handle sending headers as well as query parameters. + + // TODO: rename to handleRequest() to someday handle sending headers as well as + // query parameters. public String handleRequest(boolean preview) { - JsonObject obj = JsonUtil.getJsonObject(externalTool.getToolParameters()); - JsonString method = obj.getJsonString("httpMethod"); - requestMethod = method!=null?method.getString():HttpMethod.GET; - JsonArray queryParams = obj.getJsonArray("queryParameters"); - List params = new ArrayList<>(); + JsonObject toolParameters = JsonUtil.getJsonObject(externalTool.getToolParameters()); + JsonString method = toolParameters.getJsonString("httpMethod"); + requestMethod = method != null ? method.getString() : HttpMethod.GET; + JsonObject params = getParams(toolParameters); + logger.fine("Found params: " + JsonUtil.prettyPrint(params)); if (requestMethod.equals(HttpMethod.GET)) { - if (queryParams == null || queryParams.isEmpty()) { - return ""; + String paramsString = ""; + if (externalTool.getAllowedApiCalls() == null) { + // Legacy, using apiKey + logger.fine("Legacy Case"); + + for (Entry entry : params.entrySet()) { + paramsString = paramsString + (paramsString.isEmpty() ? "?" : "&") + entry.getKey() + "="; + JsonValue val = entry.getValue(); + if (val.getValueType().equals(JsonValue.ValueType.NUMBER)) { + paramsString += ((JsonNumber) val).intValue(); + } else { + paramsString += ((JsonString) val).getString(); + } + } + } else { + //Send a signed callback to get params and signedURLs + String callback = null; + switch (externalTool.getScope()) { + case DATASET: + callback=SystemConfig.getDataverseSiteUrlStatic() + "/api/v1/datasets/" + + dataset.getId() + "/versions/:latest/toolparams/" + externalTool.getId(); + case FILE: + callback= SystemConfig.getDataverseSiteUrlStatic() + "/api/v1/files/" + + dataFile.getId() + "/metadata/" + fileMetadata.getId() + "/toolparams/" + + externalTool.getId(); + } + if (apiToken != null) { + callback = UrlSignerUtil.signUrl(callback, 5, apiToken.getAuthenticatedUser().getUserIdentifier(), HttpMethod.GET, + System.getProperty(SystemConfig.API_SIGNING_SECRET, "") + apiToken.getTokenString()); + } + paramsString= "?callback=" + Base64.getEncoder().encodeToString(StringUtils.getBytesUtf8(callback)); + if (getLocaleCode() != null) { + paramsString += "&locale=" + getLocaleCode(); + } + } + if (preview) { + paramsString += "&preview=true"; + } + logger.fine("GET return is: " + paramsString); + return paramsString; + + } else { + // ToDo - if the allowedApiCalls() are defined, could/should we send them to + // tools using GET as well? + + if (requestMethod.equals(HttpMethod.POST)) { + String body = JsonUtil.prettyPrint(createPostBody(params).build()); + try { + logger.info("POST Body: " + body); + return postFormData(body); + } catch (IOException | InterruptedException ex) { + Logger.getLogger(ExternalToolHandler.class.getName()).log(Level.SEVERE, null, ex); + } } + } + return null; + } + + public JsonObject getParams(JsonObject toolParameters) { + JsonArray queryParams = toolParameters.getJsonArray("queryParameters"); + + // ToDo return json and print later + JsonObjectBuilder paramsBuilder = Json.createObjectBuilder(); + if (!(queryParams == null) && !queryParams.isEmpty()) { queryParams.getValuesAs(JsonObject.class).forEach((queryParam) -> { queryParam.keySet().forEach((key) -> { String value = queryParam.getString(key); - String param = getQueryParam(key, value); - if (param != null && !param.isEmpty()) { - params.add(param); + JsonValue param = getParam(value); + if (param != null) { + paramsBuilder.add(key, param); } }); }); } + return paramsBuilder.build(); + } - //ToDo - if the allowedApiCalls() are defined, could/should we send them to tools using GET as well? + public JsonObjectBuilder createPostBody(JsonObject params) { + JsonObjectBuilder bodyBuilder = Json.createObjectBuilder(); + + bodyBuilder.add("queryParameters", params); + + JsonArray apiArray = JsonUtil.getJsonArray(externalTool.getAllowedApiCalls()); + JsonArrayBuilder apisBuilder = Json.createArrayBuilder(); - if (requestMethod.equals(HttpMethod.POST)) { - JsonArrayBuilder jsonArrayBuilder = Json.createArrayBuilder(); - try { - queryParams.getValuesAs(JsonObject.class).forEach((queryParam) -> { - queryParam.keySet().forEach((key) -> { - String value = queryParam.getString(key); - String param = getPostBodyParam(key, value); - if (param != null && !param.isEmpty()) { - params.add(param); - } - }); - }); - String addVal = String.join(",", params); - String kvp = "{\"queryParameters\":{" + addVal; - - String allowedApis; - - JsonObject objApis = JsonUtil.getJsonObject(externalTool.getAllowedApiCalls()); - - JsonArray apis = objApis.getJsonArray("apis"); - apis.getValuesAs(JsonObject.class).forEach(((apiObj) -> { - String name = apiObj.getJsonString("name").getString(); - String httpmethod = apiObj.getJsonString("method").getString(); - int timeout = apiObj.getInt("timeOut"); - String urlTemplate = apiObj.getJsonString("urlTemplate").getString(); - logger.fine("URL Template: " + urlTemplate); - String apiPath = replaceTokensWithValues(urlTemplate); - logger.fine("URL WithTokens: " + apiPath); - String url = UrlSignerUtil.signUrl(apiPath, timeout, user, httpmethod, System.getProperty(SystemConfig.API_SIGNING_SECRET, "") + getApiToken().getTokenString()); - logger.fine("Signed URL: " + url); - jsonArrayBuilder.add(Json.createObjectBuilder().add("name", name).add("httpMethod", httpmethod) - .add("signedUrl", url).add("timeOut", timeout)); - })); - JsonArray allowedApiCallsArray = jsonArrayBuilder.build(); - allowedApis = "\"signedUrls\":" + JsonUtil.prettyPrint(allowedApiCallsArray) + "}"; - logger.fine("Sending these signed URLS: " + allowedApis); - String body = kvp + "}," + allowedApis; - logger.info(body); - return postFormData(body); - } catch (IOException | InterruptedException ex) { - Logger.getLogger(ExternalToolHandler.class.getName()).log(Level.SEVERE, null, ex); + apiArray.getValuesAs(JsonObject.class).forEach(((apiObj) -> { + logger.info(JsonUtil.prettyPrint(apiObj)); + String name = apiObj.getJsonString("name").getString(); + String httpmethod = apiObj.getJsonString("method").getString(); + int timeout = apiObj.getInt("timeOut"); + String urlTemplate = apiObj.getJsonString("urlTemplate").getString(); + logger.fine("URL Template: " + urlTemplate); + urlTemplate = SystemConfig.getDataverseSiteUrlStatic() + urlTemplate; + String apiPath = replaceTokensWithValues(urlTemplate); + logger.fine("URL WithTokens: " + apiPath); + String url = apiPath; + // Sign if apiToken exists, otherwise send unsigned URL (i.e. for guest users) + ApiToken apiToken = getApiToken(); + logger.info("Fullkey create: " + System.getProperty(SystemConfig.API_SIGNING_SECRET, "") + getApiToken().getTokenString()); + if (apiToken != null) { + url = UrlSignerUtil.signUrl(apiPath, timeout, apiToken.getAuthenticatedUser().getUserIdentifier(), httpmethod, + System.getProperty(SystemConfig.API_SIGNING_SECRET, "") + getApiToken().getTokenString()); } - } - if (!preview) { - return "?" + String.join("&", params); - } else { - return "?" + String.join("&", params) + "&preview=true"; - } + logger.fine("Signed URL: " + url); + apisBuilder.add(Json.createObjectBuilder().add("name", name).add("httpMethod", httpmethod) + .add("signedUrl", url).add("timeOut", timeout)); + })); + bodyBuilder.add("signedUrls", apisBuilder); + return bodyBuilder; } - - private String postFormData(String allowedApis ) throws IOException, InterruptedException{ + private String postFormData(String allowedApis) throws IOException, InterruptedException { String url = null; HttpClient client = HttpClient.newHttpClient(); - HttpRequest request = HttpRequest.newBuilder().POST(HttpRequest.BodyPublishers.ofString(allowedApis)).uri(URI.create(externalTool.getToolUrl())) - .header("Content-Type", "application/json") - .build(); + HttpRequest request = HttpRequest.newBuilder().POST(HttpRequest.BodyPublishers.ofString(allowedApis)) + .uri(URI.create(externalTool.getToolUrl())).header("Content-Type", "application/json").build(); HttpResponse response = client.send(request, HttpResponse.BodyHandlers.ofString()); - boolean redirect=false; + boolean redirect = false; int status = response.statusCode(); if (status != HttpURLConnection.HTTP_OK) { - if (status == HttpURLConnection.HTTP_MOVED_TEMP - || status == HttpURLConnection.HTTP_MOVED_PERM + if (status == HttpURLConnection.HTTP_MOVED_TEMP || status == HttpURLConnection.HTTP_MOVED_PERM || status == HttpURLConnection.HTTP_SEE_OTHER) { redirect = true; } } - if (redirect==true){ + if (redirect == true) { String newUrl = response.headers().firstValue("location").get(); // toolContext = "http://" + response.uri().getAuthority(); - + url = newUrl; } return url; } - + public String getToolUrlWithQueryParams() { String params = ExternalToolHandler.this.handleRequest(); return toolContext + params; } - + public String getToolUrlForPreviewMode() { return externalTool.getToolUrl() + handleRequest(true); } @@ -199,9 +241,9 @@ public void setApiToken(ApiToken apiToken) { } /** - * @return Returns Javascript that opens the explore tool in a new browser - * tab if the browser allows it.If not, it shows an alert that popups must - * be enabled in the browser. + * @return Returns Javascript that opens the explore tool in a new browser tab + * if the browser allows it.If not, it shows an alert that popups must + * be enabled in the browser. */ public String getExploreScript() { String toolUrl = this.getToolUrlWithQueryParams(); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java index 00c93cda1f9..4acf2d544e8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java @@ -5,6 +5,9 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import javax.json.Json; +import javax.json.JsonValue; + import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.FileMetadata; @@ -95,29 +98,17 @@ public ApiToken getApiToken() { public String getLocaleCode() { return localeCode; } - - public String getQueryParam(String key, String value) { - String tokenValue = null; - tokenValue = getTokenValue(value); - if (tokenValue != null) { - return key + '=' + tokenValue; - } else { - return null; - } - } - - public String getPostBodyParam(String key, String value) { + public JsonValue getParam(String value) { String tokenValue = null; tokenValue = getTokenValue(value); - if (tokenValue != null) { + if (tokenValue != null && !tokenValue.isBlank()) { try{ int x =Integer.parseInt(tokenValue); - return "\""+ key + "\"" + ':' + tokenValue; + return Json.createValue(x); } catch (NumberFormatException nfe){ - return "\""+ key + "\"" + ':' + "\"" + tokenValue + "\""; + return Json.createValue(tokenValue); } - } else { return null; } From a5ca4e29280556094bdde5c961f204f896f13fc8 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 27 Sep 2022 12:51:40 -0400 Subject: [PATCH 227/608] cleanup, note :me works in UI --- src/main/java/edu/harvard/iq/dataverse/api/Users.java | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Users.java b/src/main/java/edu/harvard/iq/dataverse/api/Users.java index 181458bfd6c..7568c7caff6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Users.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Users.java @@ -201,19 +201,17 @@ public Response getAuthenticatedUserByToken() { String tokenFromRequestAPI = getRequestApiKey(); AuthenticatedUser authenticatedUser = findUserByApiToken(tokenFromRequestAPI); + // This allows use of the :me API call from an active login session. Not sure + // this is a good idea if (authenticatedUser == null) { try { authenticatedUser = findAuthenticatedUserOrDie(); - return ok(json(authenticatedUser)); } catch (WrappedResponse ex) { Logger.getLogger(Users.class.getName()).log(Level.SEVERE, null, ex); return error(Response.Status.BAD_REQUEST, "User with token " + tokenFromRequestAPI + " not found."); } - - } else { - return ok(json(authenticatedUser)); } - + return ok(json(authenticatedUser)); } @POST From a413a13f3a04c2dca2e2c39d80791f1cf47a939a Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 27 Sep 2022 12:52:06 -0400 Subject: [PATCH 228/608] add tool callback methods for dataset and datafile --- .../harvard/iq/dataverse/api/Datasets.java | 38 +++++++++++++++++-- .../edu/harvard/iq/dataverse/api/Files.java | 37 +++++++++++++++++- 2 files changed, 71 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index aff543e643c..84f03ed275c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -57,10 +57,11 @@ import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetThumbnailCommand; import edu.harvard.iq.dataverse.export.DDIExportServiceBean; import edu.harvard.iq.dataverse.export.ExportService; +import edu.harvard.iq.dataverse.externaltools.ExternalTool; +import edu.harvard.iq.dataverse.externaltools.ExternalToolHandler; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; - -import edu.harvard.iq.dataverse.S3PackageImporter; +import edu.harvard.iq.dataverse.api.AbstractApiBean.WrappedResponse; import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO; import edu.harvard.iq.dataverse.batch.util.LoggingUtil; import edu.harvard.iq.dataverse.dataaccess.DataAccess; @@ -142,7 +143,6 @@ import javax.ws.rs.core.*; import javax.ws.rs.core.Response.Status; import static javax.ws.rs.core.Response.Status.BAD_REQUEST; -import javax.ws.rs.core.UriInfo; import org.apache.commons.lang3.StringUtils; import org.apache.solr.client.solrj.SolrServerException; @@ -3581,4 +3581,36 @@ private boolean isSingleVersionArchiving() { } return false; } + + // This method provides a callback for an external tool to retrieve it's + // parameters/api URLs. If the request is authenticated, e.g. by it being + // signed, the api URLs will be signed. If a guest request is made, the URLs + // will be plain/unsigned. + // This supports the cases where a tool is accessing a restricted resource (e.g. + // for a draft dataset), or public case. + @GET + @Path("{id}/versions/{version}/toolparams/{tid}") + public Response getExternalToolDVParams(@PathParam("tid") long externalToolId, + @PathParam("id") String datasetId, @PathParam("version") String version, @QueryParam(value = "locale") String locale) { + try { + DataverseRequest req = createDataverseRequest(findUserOrDie()); + DatasetVersion target = getDatasetVersionOrDie(req, version, findDatasetOrDie(datasetId), null, null); + if (target == null) { + return error(BAD_REQUEST, "DatasetVersion not found."); + } + + ExternalTool externalTool = externalToolService.findById(externalToolId); + ApiToken apiToken = null; + User u = findUserOrDie(); + if (u instanceof AuthenticatedUser) { + apiToken = authSvc.findApiTokenByUser((AuthenticatedUser) u); + } + + + ExternalToolHandler eth = new ExternalToolHandler(externalTool, target.getDataset(), apiToken, locale); + return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters())))); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index 9dc0c3be524..1bfa9ee1d7b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -14,6 +14,7 @@ import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.TermsOfUseAndAccessValidator; import edu.harvard.iq.dataverse.UserNotificationServiceBean; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.datasetutility.AddReplaceFileHelper; @@ -31,6 +32,8 @@ import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; import edu.harvard.iq.dataverse.export.ExportException; import edu.harvard.iq.dataverse.export.ExportService; +import edu.harvard.iq.dataverse.externaltools.ExternalTool; +import edu.harvard.iq.dataverse.externaltools.ExternalToolHandler; import edu.harvard.iq.dataverse.ingest.IngestRequest; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.ingest.IngestUtil; @@ -40,6 +43,7 @@ import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; import java.io.InputStream; import java.io.StringReader; @@ -451,7 +455,8 @@ public Response updateFileMetadata(@FormDataParam("jsonData") String jsonData, @GET @Path("{id}/metadata") public Response getFileMetadata(@PathParam("id") String fileIdOrPersistentId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response, Boolean getDraft) throws WrappedResponse, Exception { - DataverseRequest req; + //ToDo - versionId is not used - can't get metadata for earlier versions + DataverseRequest req; try { req = createDataverseRequest(findUserOrDie()); } catch (Exception e) { @@ -639,4 +644,34 @@ private void exportDatasetMetadata(SettingsServiceBean settingsServiceBean, Data } } + // This method provides a callback for an external tool to retrieve it's + // parameters/api URLs. If the request is authenticated, e.g. by it being + // signed, the api URLs will be signed. If a guest request is made, the URLs + // will be plain/unsigned. + // This supports the cases where a tool is accessing a restricted resource (e.g. + // preview of a draft file), or public case. + @GET + @Path("{id}/metadata/{fmid}/toolparams/{tid}") + public Response getExternalToolFMParams(@PathParam("tid") long externalToolId, + @PathParam("id") long fileId, @PathParam("fmid") long fmid, @QueryParam(value = "locale") String locale) { + try { + ExternalTool externalTool = externalToolService.findById(externalToolId); + ApiToken apiToken = null; + User u = findUserOrDie(); + if (u instanceof AuthenticatedUser) { + apiToken = authSvc.findApiTokenByUser((AuthenticatedUser) u); + } + FileMetadata target = fileSvc.findFileMetadata(fmid); + if (target == null) { + return error(BAD_REQUEST, "FileMetadata not found."); + } + + ExternalToolHandler eth = null; + + eth = new ExternalToolHandler(externalTool, target.getDataFile(), apiToken, target, locale); + return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters())))); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } } From 2ed2414a622fe4e93dc2f05703b3d57eb41ccddb Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 27 Sep 2022 12:52:17 -0400 Subject: [PATCH 229/608] add flyway script --- .../db/migration/V5.12.0.1__7715-signed-urls-for-tools.sql | 1 + 1 file changed, 1 insertion(+) create mode 100644 src/main/resources/db/migration/V5.12.0.1__7715-signed-urls-for-tools.sql diff --git a/src/main/resources/db/migration/V5.12.0.1__7715-signed-urls-for-tools.sql b/src/main/resources/db/migration/V5.12.0.1__7715-signed-urls-for-tools.sql new file mode 100644 index 00000000000..b47529800d3 --- /dev/null +++ b/src/main/resources/db/migration/V5.12.0.1__7715-signed-urls-for-tools.sql @@ -0,0 +1 @@ +ALTER TABLE externaltool ADD COLUMN IF NOT EXISTS allowedapicalls VARCHAR; From 6b9d69f92198aebed388b94c8d5d00bb49b7dc4b Mon Sep 17 00:00:00 2001 From: Robert Treacy Date: Mon, 26 Sep 2022 14:39:33 -0400 Subject: [PATCH 230/608] uses JsonObjectBuilder, elininating some string building that was messy and brittle, Probably still a little string building could be cleaned up --- .../harvard/iq/dataverse/externaltools/ExternalToolHandler.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java index 085c2a7b3bb..6308b3bed1d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java @@ -168,7 +168,6 @@ public JsonObject getParams(JsonObject toolParameters) { public JsonObjectBuilder createPostBody(JsonObject params) { JsonObjectBuilder bodyBuilder = Json.createObjectBuilder(); - bodyBuilder.add("queryParameters", params); JsonArray apiArray = JsonUtil.getJsonArray(externalTool.getAllowedApiCalls()); From b098ba6646c3c9a344f6c7a51ac15bd057ef6345 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 27 Sep 2022 13:17:58 -0400 Subject: [PATCH 231/608] use httpMethod in toolParams and allowedapicalls --- .../harvard/iq/dataverse/externaltools/ExternalToolHandler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java index 6308b3bed1d..5003a06692c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java @@ -176,7 +176,7 @@ public JsonObjectBuilder createPostBody(JsonObject params) { apiArray.getValuesAs(JsonObject.class).forEach(((apiObj) -> { logger.info(JsonUtil.prettyPrint(apiObj)); String name = apiObj.getJsonString("name").getString(); - String httpmethod = apiObj.getJsonString("method").getString(); + String httpmethod = apiObj.getJsonString("httpMethod").getString(); int timeout = apiObj.getInt("timeOut"); String urlTemplate = apiObj.getJsonString("urlTemplate").getString(); logger.fine("URL Template: " + urlTemplate); From 48b2e04285b6e1c8be04865be74b6160851a99a1 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 27 Sep 2022 14:06:44 -0400 Subject: [PATCH 232/608] doc updates --- .../admin/dataverse-external-tools.tsv | 2 +- .../external-tools/dynamicDatasetTool.json | 12 ++++++-- .../root/external-tools/fabulousFileTool.json | 16 ++++++++-- .../source/api/external-tools.rst | 29 ++++++++++++++++++- 4 files changed, 53 insertions(+), 6 deletions(-) diff --git a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv index 61db5dfed93..fd1f0f27bc5 100644 --- a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv +++ b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv @@ -1,5 +1,5 @@ Tool Type Scope Description Data Explorer explore file A GUI which lists the variables in a tabular data file allowing searching, charting and cross tabulation analysis. See the README.md file at https://github.com/scholarsportal/dataverse-data-explorer-v2 for the instructions on adding Data Explorer to your Dataverse. Whole Tale explore dataset A platform for the creation of reproducible research packages that allows users to launch containerized interactive analysis environments based on popular tools such as Jupyter and RStudio. Using this integration, Dataverse users can launch Jupyter and RStudio environments to analyze published datasets. For more information, see the `Whole Tale User Guide `_. -File Previewers explore file A set of tools that display the content of files - including audio, html, `Hypothes.is `_ annotations, images, PDF, text, video, tabular data, spreadsheets, and GeoJSON - allowing them to be viewed without downloading. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/gdcc/dataverse-previewers +File Previewers explore file A set of tools that display the content of files - including audio, html, `Hypothes.is `_ annotations, images, PDF, text, video, tabular data, spreadsheets, GeoJSON, and ZipFiles - allowing them to be viewed without downloading the file. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/gdcc/dataverse-previewers Data Curation Tool configure file A GUI for curating data by adding labels, groups, weights and other details to assist with informed reuse. See the README.md file at https://github.com/scholarsportal/Dataverse-Data-Curation-Tool for the installation instructions. diff --git a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/dynamicDatasetTool.json b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/dynamicDatasetTool.json index e30c067a86b..47413c8a625 100644 --- a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/dynamicDatasetTool.json +++ b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/dynamicDatasetTool.json @@ -12,8 +12,16 @@ "PID": "{datasetPid}" }, { - "apiToken": "{apiToken}" + "locale":"{localeCode}" } - ] + ], + "allowedApiCalls": [ + { + "name":"retrieveDatasetJson", + "httpMethod":"GET", + "urlTemplate":"/api/v1/datasets/{datasetId}", + "timeOut":10 + } + ] } } diff --git a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/fabulousFileTool.json b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/fabulousFileTool.json index 14f71a280b3..83594929a96 100644 --- a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/fabulousFileTool.json +++ b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/fabulousFileTool.json @@ -1,6 +1,6 @@ { "displayName": "Fabulous File Tool", - "description": "Fabulous Fun for Files!", + "description": "A non-existent tool that is Fabulous Fun for Files!", "toolName": "fabulous", "scope": "file", "types": [ @@ -9,13 +9,25 @@ ], "toolUrl": "https://fabulousfiletool.com", "contentType": "text/tab-separated-values", + "httpMethod":"GET", "toolParameters": { "queryParameters": [ { "fileid": "{fileId}" }, { - "key": "{apiToken}" + "datasetPid": "{datasetPid}" + }, + { + "locale":"{localeCode}" + } + ], + "allowedApiCalls": [ + { + "name":"retrieveDataFile", + "httpMethod":"GET", + "urlTemplate":"/api/v1/access/datafile/{fileId}", + "timeOut":270 } ] } diff --git a/doc/sphinx-guides/source/api/external-tools.rst b/doc/sphinx-guides/source/api/external-tools.rst index d72a6f62004..c5b1c43745e 100644 --- a/doc/sphinx-guides/source/api/external-tools.rst +++ b/doc/sphinx-guides/source/api/external-tools.rst @@ -92,7 +92,9 @@ Terminology contentType File level tools operate on a specific **file type** (content type or MIME type such as "application/pdf") and this must be specified. Dataset level tools do not use contentType. - toolParameters **Query parameters** are supported and described below. + toolParameters **httpMethod**, **Query parameters**, and **allowedApiCalls** are supported and described below. + + httpMethod Either **GET** or **POST**. queryParameters **Key/value combinations** that can be appended to the toolUrl. For example, once substitution takes place (described below) the user may be redirected to ``https://fabulousfiletool.com?fileId=42&siteUrl=http://demo.dataverse.org``. @@ -102,6 +104,16 @@ Terminology reserved words A **set of strings surrounded by curly braces** such as ``{fileId}`` or ``{datasetId}`` that will be inserted into query parameters. See the table below for a complete list. + allowedApiCalls An array of objects defining callbacks the tool is allowed to make to the Dataverse API. If the dataset or file being accessed is not public, the callback URLs will be signed to allow the tool access for a defined time. + + allowdApiCalls name A name the tool will use to identify this callback URL + + allowedApiCalls urlTemplate The relative URL for the callback using the reserved words to indicate where values should by dynamically substituted + + allowdApiCalls httpMethod Which HTTP method the specified callback uses + + allowedApiCalls timeOut For non-public datasets and datafiles, how long the signed URLs given to the tool should be valid for. + toolName A **name** of an external tool that is used to differentiate between external tools and also used in bundle.properties for localization in the Dataverse installation web interface. For example, the toolName for Data Explorer is ``explorer``. For the Data Curation Tool the toolName is ``dct``. This is an optional parameter in the manifest JSON file. =========================== ========== @@ -131,6 +143,21 @@ Reserved Words ``{localeCode}`` optional The code for the language ("en" for English, "fr" for French, etc.) that user has selected from the language toggle in a Dataverse installation. See also :ref:`i18n`. =========================== ========== =========== +Authorization Options ++++++++++++++++++++++ + +When called for Datasets or DataFiles that are not public, i.e. in a draft dataset or for a restricted file, external tools are allowed access via the user's credentials. This is accomplished by one of two mechanisms: + +* Signed URLs (more secure, recommended) + Configured via the allowedApiCalls section of the manifest. The tool will be provided with signed URLs allowing the specified access to the given dataset or datafile for the specified amount of time. The tool will not be able to access any other datasets or files the user may have access to and will not be able to make calls other than those specified. + For tools invoked via a GET call, Dataverse will include a callback query parameter with a Base64 encoded value. The decoded value is a signed URL that can be called to retrieve a JSON response containing all of the queryParameters and allowedApiCalls specified in the manfiest. + For tools invoked via POST, Dataverse will send a JSON body including the requested queryParameters and allowedApiCalls. Dataverse expects the response to the POST to indicate a redirect which Dataverse will use to open the tool. + +* ApiToken (deprecated, less secure, not recommended) + Configured via the queryParameters by including an {apiToken} value. When this is present Dataverse will send the user's apiToken to the tool. With the user's apiToken, the tool can perform any action via the Dataverse api that the user could. External tools configured via this method should be assessed for their trustworthiness. + For tools invoked via GET, this will be done via a query parameter in the request URL which could be cached in the browser's history. Dataverse expects the response to the POST to indicate a redirect which Dataverse will use to open the tool. + For tools invoked via POST, Dataverse will send a JSON body including the apiToken. + Internationalization of Your External Tool ++++++++++++++++++++++++++++++++++++++++++ From 2eb90f18ca579f82ef707c96727f0df70bfc4211 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 27 Sep 2022 14:06:58 -0400 Subject: [PATCH 233/608] use java.util.Base64 --- .../iq/dataverse/externaltools/ExternalToolHandler.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java index 5003a06692c..26f0f5a7c4b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java @@ -16,6 +16,7 @@ import java.net.http.HttpClient; import java.net.http.HttpRequest; import java.net.http.HttpResponse; +import java.util.Base64; import java.util.Map.Entry; import java.util.logging.Level; import java.util.logging.Logger; @@ -32,8 +33,6 @@ import org.apache.commons.codec.binary.StringUtils; -import com.github.scribejava.core.java8.Base64; - /** * Handles an operation on a specific file. Requires a file id in order to be * instantiated. Applies logic based on an {@link ExternalTool} specification, @@ -148,6 +147,7 @@ public String handleRequest(boolean preview) { } public JsonObject getParams(JsonObject toolParameters) { + //ToDo - why an array of object each with a single key/value pair instead of one object? JsonArray queryParams = toolParameters.getJsonArray("queryParameters"); // ToDo return json and print later From 86f910dc276fdc1a464a0edcda8a86e2f5b68b45 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 27 Sep 2022 14:11:00 -0400 Subject: [PATCH 234/608] release note --- doc/release-notes/7715-signed-urls-for-external-tools.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 doc/release-notes/7715-signed-urls-for-external-tools.md diff --git a/doc/release-notes/7715-signed-urls-for-external-tools.md b/doc/release-notes/7715-signed-urls-for-external-tools.md new file mode 100644 index 00000000000..00b5cff24b3 --- /dev/null +++ b/doc/release-notes/7715-signed-urls-for-external-tools.md @@ -0,0 +1,3 @@ +# Improved Security for External Tools + +This release adds support for configuring external tools to use signed URLs to access the Dataverse API. This eliminates the need for tools to have access to the user's apiToken in order to access draft or restricted datasets and datafiles. \ No newline at end of file From 6a9ab48a42d48a34346eb2c6838dbf1c590c6ee7 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 27 Sep 2022 15:27:53 -0400 Subject: [PATCH 235/608] Tests --- .../ExternalToolHandlerTest.java | 36 ++++++++++++++++ .../ExternalToolServiceBeanTest.java | 43 +++++++++++++++++++ 2 files changed, 79 insertions(+) diff --git a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandlerTest.java b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandlerTest.java index 8e70934b4ad..70c835839bb 100644 --- a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandlerTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandlerTest.java @@ -6,9 +6,16 @@ import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.util.json.JsonUtil; + import javax.json.Json; +import javax.json.JsonObject; +import javax.json.JsonObjectBuilder; + import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; import java.util.ArrayList; import java.util.List; @@ -198,4 +205,33 @@ public void testGetToolUrlWithOptionalQueryParameters() { } + @Test + public void testGetToolUrlWithallowedApiCalls() { + + System.out.println("allowedApiCalls test"); + Dataset ds = new Dataset(); + ds.setId(1L); + ApiToken at = new ApiToken(); + AuthenticatedUser au = new AuthenticatedUser(); + au.setUserIdentifier("dataverseAdmin"); + at.setAuthenticatedUser(au); + at.setTokenString("1234"); + ExternalTool et = ExternalToolServiceBeanTest.getAllowedApiCallsTool(); + assertTrue(et != null); + System.out.println("allowedApiCalls et created"); + System.out.println(et.getAllowedApiCalls()); + ExternalToolHandler externalToolHandler = new ExternalToolHandler(et, ds, at, null); + System.out.println("allowedApiCalls eth created"); + JsonObject jo = externalToolHandler + .createPostBody(externalToolHandler.getParams(JsonUtil.getJsonObject(et.getToolParameters()))).build(); + assertEquals(1, jo.getJsonObject("queryParameters").getInt("datasetId")); + String signedUrl = jo.getJsonArray("signedUrls").getJsonObject(0).getString("signedUrl"); + // The date and token will change each time but check for the constant parts of + // the response + assertTrue(signedUrl.contains("https://librascholar.org/api/v1/datasets/1")); + assertTrue(signedUrl.contains("&user=dataverseAdmin")); + assertTrue(signedUrl.contains("&method=GET")); + assertTrue(signedUrl.contains("&token=")); + System.out.println(JsonUtil.prettyPrint(jo)); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java index 304898f0fb0..74e10d67352 100644 --- a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java @@ -501,4 +501,47 @@ public void testParseAddToolWithLegacyType() { assertNull(externalTool.getContentType()); } + @Test + public void testParseAddDatasetToolAllowedApiCalls() { + + ExternalTool externalTool = null; + try { + externalTool = getAllowedApiCallsTool(); + } catch (Exception ex) { + System.out.println(ex.getMessage()); + } + assertNotNull(externalTool); + assertNull(externalTool.getContentType()); + } + + protected static ExternalTool getAllowedApiCallsTool() { + JsonObjectBuilder job = Json.createObjectBuilder(); + job.add("displayName", "AwesomeTool"); + job.add("toolName", "explorer"); + job.add("description", "This tool is awesome."); + job.add("types", Json.createArrayBuilder().add("explore")); + job.add("scope", "dataset"); + job.add("toolUrl", "http://awesometool.com"); + job.add("hasPreviewMode", "true"); + + job.add("toolParameters", Json.createObjectBuilder() + .add("httpMethod", "GET") + .add("queryParameters", + Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("datasetId", "{datasetId}") + ) + ) + ).add("allowedApiCalls", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("name", "getDataset") + .add("httpMethod", "GET") + .add("urlTemplate", "/api/v1/datasets/{datasetId}") + .add("timeOut", 10)) + ); + String tool = job.build().toString(); + System.out.println("tool: " + tool); + + return ExternalToolServiceBean.parseAddExternalToolManifest(tool); + } } From 4da5be02ecdf35d7b6e513fd055475afd801c23f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 27 Sep 2022 15:37:18 -0400 Subject: [PATCH 236/608] remove test logging --- .../harvard/iq/dataverse/externaltools/ExternalToolHandler.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java index 26f0f5a7c4b..7f2087c1e31 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java @@ -186,7 +186,6 @@ public JsonObjectBuilder createPostBody(JsonObject params) { String url = apiPath; // Sign if apiToken exists, otherwise send unsigned URL (i.e. for guest users) ApiToken apiToken = getApiToken(); - logger.info("Fullkey create: " + System.getProperty(SystemConfig.API_SIGNING_SECRET, "") + getApiToken().getTokenString()); if (apiToken != null) { url = UrlSignerUtil.signUrl(apiPath, timeout, apiToken.getAuthenticatedUser().getUserIdentifier(), httpmethod, System.getProperty(SystemConfig.API_SIGNING_SECRET, "") + getApiToken().getTokenString()); From 724f88deca52645b8f17852f8a2b641e0fb81e31 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 28 Sep 2022 09:41:11 -0400 Subject: [PATCH 237/608] remove toolContext - wasn't set for dataset tools, isn't needed --- .../iq/dataverse/externaltools/ExternalToolHandler.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java index 7f2087c1e31..c9da22081b9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java @@ -43,7 +43,6 @@ public class ExternalToolHandler extends URLTokenUtil { private final ExternalTool externalTool; private String requestMethod; - private String toolContext; /** * File level tool @@ -57,7 +56,6 @@ public ExternalToolHandler(ExternalTool externalTool, DataFile dataFile, ApiToke FileMetadata fileMetadata, String localeCode) { super(dataFile, apiToken, fileMetadata, localeCode); this.externalTool = externalTool; - toolContext = externalTool.getToolUrl(); } /** @@ -223,7 +221,7 @@ private String postFormData(String allowedApis) throws IOException, InterruptedE public String getToolUrlWithQueryParams() { String params = ExternalToolHandler.this.handleRequest(); - return toolContext + params; + return externalTool.getToolUrl() + params; } public String getToolUrlForPreviewMode() { From 88bc73c14855697d5032a82acf7cd5b1df115330 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 28 Sep 2022 12:14:25 -0400 Subject: [PATCH 238/608] handle unsigned urls --- .../java/edu/harvard/iq/dataverse/api/AbstractApiBean.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java index 22d1f668949..a6e1f4d9ef1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java @@ -420,11 +420,11 @@ private AuthenticatedUser findAuthenticatedUserOrDie( String key, String wfid ) } else { throw new WrappedResponse(badWFKey(wfid)); } - } else { + } else if (getRequestParameter("token") != null) { AuthenticatedUser authUser = getAuthenticatedUserFromSignedUrl(); if (authUser != null) { return authUser; - } + } } //Just send info about the apiKey - workflow users will learn about invocationId elsewhere throw new WrappedResponse(badApiKey(null)); From 3ea4e92b48452c3785f3e7c60df4acdf40f8bd1e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 30 Sep 2022 09:21:16 -0400 Subject: [PATCH 239/608] todo is done --- src/main/java/edu/harvard/iq/dataverse/api/Files.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index 9dc0c3be524..d1ecd2d8824 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -231,7 +231,6 @@ public Response replaceFileInDataset( if (null == contentDispositionHeader) { if (optionalFileParams.hasStorageIdentifier()) { newStorageIdentifier = optionalFileParams.getStorageIdentifier(); - // ToDo - check that storageIdentifier is valid if (optionalFileParams.hasFileName()) { newFilename = optionalFileParams.getFileName(); if (optionalFileParams.hasMimetype()) { From cb5007a6a5ad46e27dce34dbd5c2bd16bdc9044e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 30 Sep 2022 09:21:37 -0400 Subject: [PATCH 240/608] add getjsonarray --- .../java/edu/harvard/iq/dataverse/util/json/JsonUtil.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java index f4a3c635f8b..21ff0e03773 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java @@ -63,4 +63,10 @@ public static javax.json.JsonObject getJsonObject(String serializedJson) { return Json.createReader(rdr).readObject(); } } + + public static javax.json.JsonArray getJsonArray(String serializedJson) { + try (StringReader rdr = new StringReader(serializedJson)) { + return Json.createReader(rdr).readArray(); + } + } } From e06ec36b2a4a78e8c64e42858542faaccf62841b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 30 Sep 2022 10:04:55 -0400 Subject: [PATCH 241/608] Add /replaceFiles call refactor to make multifile a separate boolean remove unused LicenseBean from constructor updated /addFiles logic to use clone refactored steps 70/80 to work for multi-replace. i.e. by tracking filesToDelete and the physical files to delete. replace local Json readers with JsonUtil method move sanity check on file deletes to DataFileServiceBean --- .../iq/dataverse/DataFileServiceBean.java | 4 + .../iq/dataverse/EditDatafilesPage.java | 3 +- .../harvard/iq/dataverse/api/Datasets.java | 77 +++- .../edu/harvard/iq/dataverse/api/Files.java | 3 +- .../datasetutility/AddReplaceFileHelper.java | 415 +++++++++++++----- 5 files changed, 375 insertions(+), 127 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 0b935183182..7da06f36be4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -1544,6 +1544,10 @@ public void finalizeFileDelete(Long dataFileId, String storageLocation) throws I throw new IOException("Attempted to permanently delete a physical file still associated with an existing DvObject " + "(id: " + dataFileId + ", location: " + storageLocation); } + if(storageLocation == null || storageLocation.isBlank()) { + throw new IOException("Attempted to delete a physical file with no location " + + "(id: " + dataFileId + ", location: " + storageLocation); + } StorageIO directStorageAccess = DataAccess.getDirectStorageIO(storageLocation); directStorageAccess.delete(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index 6cf294ffd6d..f5e137a1981 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -586,8 +586,7 @@ public String init() { datafileService, permissionService, commandEngine, - systemConfig, - licenseServiceBean); + systemConfig); fileReplacePageHelper = new FileReplacePageHelper(addReplaceFileHelper, dataset, diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index aff543e643c..ed54704c4a1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2451,8 +2451,7 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, fileService, permissionSvc, commandEngine, - systemConfig, - licenseSvc); + systemConfig); //------------------- @@ -3387,14 +3386,84 @@ public Response addFilesToDataset(@PathParam("id") String idSupplied, this.fileService, this.permissionSvc, this.commandEngine, - this.systemConfig, - this.licenseSvc + this.systemConfig ); return addFileHelper.addFiles(jsonData, dataset, authUser); } + /** + * Replace multiple Files to an existing Dataset + * + * @param idSupplied + * @param jsonData + * @return + */ + @POST + @Path("{id}/replaceFiles") + @Consumes(MediaType.MULTIPART_FORM_DATA) + public Response replaceFilesInDataset(@PathParam("id") String idSupplied, + @FormDataParam("jsonData") String jsonData) { + + if (!systemConfig.isHTTPUpload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); + } + + // ------------------------------------- + // (1) Get the user from the API key + // ------------------------------------- + User authUser; + try { + authUser = findUserOrDie(); + } catch (WrappedResponse ex) { + return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth") + ); + } + + // ------------------------------------- + // (2) Get the Dataset Id + // ------------------------------------- + Dataset dataset; + + try { + dataset = findDatasetOrDie(idSupplied); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + + dataset.getLocks().forEach(dl -> { + logger.info(dl.toString()); + }); + + //------------------------------------ + // (2a) Make sure dataset does not have package file + // -------------------------------------- + + for (DatasetVersion dv : dataset.getVersions()) { + if (dv.isHasPackageFile()) { + return error(Response.Status.FORBIDDEN, + BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile") + ); + } + } + + DataverseRequest dvRequest = createDataverseRequest(authUser); + + AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper( + dvRequest, + this.ingestService, + this.datasetService, + this.fileService, + this.permissionSvc, + this.commandEngine, + this.systemConfig + ); + + return addFileHelper.replaceFiles(jsonData, dataset, authUser); + + } + /** * API to find curation assignments and statuses * diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index d1ecd2d8824..ecb40af19f8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -256,8 +256,7 @@ public Response replaceFileInDataset( this.fileService, this.permissionSvc, this.commandEngine, - this.systemConfig, - this.licenseSvc); + this.systemConfig); // (5) Run "runReplaceFileByDatasetId" long fileToReplaceId = 0; diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index 8e7922fd83b..207f1e309be 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -26,20 +26,22 @@ import edu.harvard.iq.dataverse.engine.command.impl.RestrictFileCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; -import edu.harvard.iq.dataverse.license.LicenseServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.file.CreateDataFileResult; import edu.harvard.iq.dataverse.util.json.JsonPrinter; +import edu.harvard.iq.dataverse.util.json.JsonUtil; + import java.io.IOException; import java.io.InputStream; -import java.io.StringReader; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.logging.Level; @@ -47,10 +49,10 @@ import javax.ejb.EJBException; import javax.json.Json; import javax.json.JsonArrayBuilder; +import javax.json.JsonNumber; import javax.json.JsonObject; import javax.json.JsonArray; import javax.json.JsonObjectBuilder; -import javax.json.JsonReader; import javax.validation.ConstraintViolation; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; @@ -114,10 +116,9 @@ public class AddReplaceFileHelper{ public static String FILE_ADD_OPERATION = "FILE_ADD_OPERATION"; public static String FILE_REPLACE_OPERATION = "FILE_REPLACE_OPERATION"; public static String FILE_REPLACE_FORCE_OPERATION = "FILE_REPLACE_FORCE_OPERATION"; - public static String MULTIPLEFILES_ADD_OPERATION = "MULTIPLEFILES_ADD_OPERATION"; - + private String currentOperation; - + boolean multifile = false; // ----------------------------------- // All the needed EJBs, passed to the constructor // ----------------------------------- @@ -127,8 +128,6 @@ public class AddReplaceFileHelper{ private PermissionServiceBean permissionService; private EjbDataverseEngine commandEngine; private SystemConfig systemConfig; - private LicenseServiceBean licenseServiceBean; - // ----------------------------------- // Instance variables directly added // ----------------------------------- @@ -144,10 +143,6 @@ public class AddReplaceFileHelper{ // -- Optional private DataFile fileToReplace; // step 25 - // ----------------------------------- - // Instance variables derived from other input - // ----------------------------------- - private User user; private DatasetVersion workingVersion; private DatasetVersion clone; List initialFileList; @@ -256,13 +251,12 @@ public void resetFileHelper(){ * @param dvRequest */ public AddReplaceFileHelper(DataverseRequest dvRequest, - IngestServiceBean ingestService, + IngestServiceBean ingestService, DatasetServiceBean datasetService, DataFileServiceBean fileService, PermissionServiceBean permissionService, EjbDataverseEngine commandEngine, - SystemConfig systemConfig, - LicenseServiceBean licenseServiceBean){ + SystemConfig systemConfig){ // --------------------------------- // make sure DataverseRequest isn't null and has a user @@ -304,16 +298,12 @@ public AddReplaceFileHelper(DataverseRequest dvRequest, this.permissionService = permissionService; this.commandEngine = commandEngine; this.systemConfig = systemConfig; - this.licenseServiceBean = licenseServiceBean; - - - initErrorHandling(); // Initiate instance vars this.dataset = null; this.dvRequest = dvRequest; - this.user = dvRequest.getUser(); + dvRequest.getUser(); } @@ -336,7 +326,7 @@ public boolean runAddFileByDataset(Dataset chosenDataset, } - public boolean runAddFileByDataset(Dataset chosenDataset, + private boolean runAddFileByDataset(Dataset chosenDataset, String newFileName, String newFileContentType, String newStorageIdentifier, @@ -348,12 +338,8 @@ public boolean runAddFileByDataset(Dataset chosenDataset, initErrorHandling(); - if(multipleFiles) { - this.currentOperation = MULTIPLEFILES_ADD_OPERATION; - } - else { - this.currentOperation = FILE_ADD_OPERATION; - } + multifile=multipleFiles; + this.currentOperation = FILE_ADD_OPERATION; if (!this.step_001_loadDataset(chosenDataset)){ return false; @@ -393,6 +379,11 @@ public boolean runAddFile(Dataset dataset, }*/ + public boolean runForceReplaceFile(long fileToReplaceId, String newFilename, String newFileContentType, + String newStorageIdentifier, InputStream newFileInputStream, OptionalFileParams optionalFileParams) { + return runForceReplaceFile(fileToReplaceId, newFilename, newFileContentType, + newStorageIdentifier, newFileInputStream, optionalFileParams, false); + } /** * After the constructor, this method is called to replace a file * @@ -403,16 +394,18 @@ public boolean runAddFile(Dataset dataset, * @param newFileInputStream * @return */ - public boolean runForceReplaceFile(Long oldFileId, + private boolean runForceReplaceFile(Long oldFileId, String newFileName, String newFileContentType, String newStorageIdentifier, InputStream newFileInputStream, - OptionalFileParams optionalFileParams){ + OptionalFileParams optionalFileParams, + boolean multipleFiles){ msgt(">> runForceReplaceFile"); initErrorHandling(); + multifile=multipleFiles; this.currentOperation = FILE_REPLACE_FORCE_OPERATION; @@ -432,16 +425,25 @@ public boolean runForceReplaceFile(Long oldFileId, } - public boolean runReplaceFile(Long oldFileId, + public boolean runReplaceFile(long fileToReplaceId, String newFilename, String newFileContentType, + String newStorageIdentifier, InputStream newFileInputStream, OptionalFileParams optionalFileParams) { + return runReplaceFile(fileToReplaceId, newFilename, newFileContentType, + newStorageIdentifier, newFileInputStream, optionalFileParams, false); + + } + + private boolean runReplaceFile(Long oldFileId, String newFileName, String newFileContentType, String newStorageIdentifier, InputStream newFileInputStream, - OptionalFileParams optionalFileParams){ + OptionalFileParams optionalFileParams, + boolean multipleFiles){ msgt(">> runReplaceFile"); initErrorHandling(); + multifile=multipleFiles; this.currentOperation = FILE_REPLACE_OPERATION; if (oldFileId==null){ @@ -759,19 +761,15 @@ private boolean runAddReplacePhase2(boolean tabIngest){ return false; } - - if (this.isFileReplaceOperation()){ + if (this.isFileReplaceOperation()) { msgt("step_080_run_update_dataset_command_for_replace"); - if (!this.step_080_run_update_dataset_command_for_replace()){ - return false; + if (!this.step_080_run_update_dataset_command_for_replace()) { + return false; } - - }else{ + } else if (!multifile) { msgt("step_070_run_update_dataset_command"); - if (!this.isMultipleFilesAddOperation()) { - if (!this.step_070_run_update_dataset_command()) { - return false; - } + if (!this.step_070_run_update_dataset_command()) { + return false; } } @@ -834,16 +832,6 @@ public boolean isFileAddOperation(){ return this.currentOperation.equals(FILE_ADD_OPERATION); } - /** - * Is this a multiple files add operation ? - * @return - */ - - public boolean isMultipleFilesAddOperation(){ - - return this.currentOperation.equals(MULTIPLEFILES_ADD_OPERATION); - } - /** * Initialize error handling vars */ @@ -1201,7 +1189,10 @@ private boolean step_030_createNewFilesViaIngest(){ // Load the working version of the Dataset workingVersion = dataset.getEditVersion(); - clone = workingVersion.cloneDatasetVersion(); + if(!multifile) { + //Don't repeatedly update the clone (losing changes) in multifile case + clone = workingVersion.cloneDatasetVersion(); + } try { CreateDataFileResult result = FileUtil.createDataFiles(workingVersion, this.newFileInputStream, @@ -1292,9 +1283,6 @@ private boolean step_040_auto_checkForDuplicates(){ // Initialize new file list this.finalFileList = new ArrayList<>(); - String warningMessage = null; - - if (isFileReplaceOperation() && this.fileToReplace == null){ // This error shouldn't happen if steps called correctly this.addErrorSevere(getBundleErr("existing_file_to_replace_is_null") + " (This error shouldn't happen if steps called in sequence....checkForFileReplaceDuplicate)"); @@ -1511,10 +1499,7 @@ private boolean step_050_checkForConstraintViolations(){ return true; } - // ----------------------------------------------------------- - // violations found: gather all error messages - // ----------------------------------------------------------- - List errMsgs = new ArrayList<>(); + new ArrayList<>(); for (ConstraintViolation violation : constraintViolations) { /* for 8859 return conflict response status if the validation fails @@ -1605,70 +1590,81 @@ private boolean step_060_addFilesViaIngestService(boolean tabIngest){ return true; } + List filesToDelete = new ArrayList(); + Map deleteFileStorageLocations = new HashMap<>(); /** * Create and run the update dataset command * * @return */ - private boolean step_070_run_update_dataset_command(){ - - if (this.hasError()){ + private boolean step_070_run_update_dataset_command() { + //Note -only single file operations and multifile replace call this, multifile add does not + if (this.hasError()) { return false; } - Command update_cmd; + Command update_cmd = null; String deleteStorageLocation = null; - long deleteFileId=-1; - if(isFileReplaceOperation()) { - List filesToDelete = new ArrayList(); + long deleteFileId = -1; + if (isFileReplaceOperation()) { + if (!multifile) { + filesToDelete.clear(); + deleteFileStorageLocations.clear(); + } filesToDelete.add(fileToReplace.getFileMetadata()); - - if(!fileToReplace.isReleased()) { - //If file is only in draft version, also need to delete the physical file - deleteStorageLocation = fileService.getPhysicalFileToDelete(fileToReplace); - deleteFileId=fileToReplace.getId(); + + if (!fileToReplace.isReleased()) { + // If file is only in draft version, also need to delete the physical file + deleteStorageLocation = fileService.getPhysicalFileToDelete(fileToReplace); + deleteFileId = fileToReplace.getId(); + deleteFileStorageLocations.put(deleteFileId, deleteStorageLocation); + } + if (!multifile) { + // Adding the file to the delete list for the command will delete this + // filemetadata and, if the file hasn't been released, the datafile itself. + update_cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, filesToDelete, clone); } - //Adding the file to the delete list for the command will delete this filemetadata and, if the file hasn't been released, the datafile itself. - update_cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, filesToDelete, clone); } else { - update_cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, clone); + update_cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, clone); } - ((UpdateDatasetVersionCommand) update_cmd).setValidateLenient(true); - - try { - // Submit the update dataset command - // and update the local dataset object - // - dataset = commandEngine.submit(update_cmd); - } catch (CommandException ex) { - /** - * @todo Add a test to exercise this error. - */ - this.addErrorSevere(getBundleErr("add.add_file_error")); - logger.severe(ex.getMessage()); - return false; - }catch (EJBException ex) { - /** - * @todo Add a test to exercise this error. - */ - this.addErrorSevere("add.add_file_error (see logs)"); - logger.severe(ex.getMessage()); - return false; + if (!multifile) { + //Avoid NPE in multifile replace case + ((UpdateDatasetVersionCommand) update_cmd).setValidateLenient(true); } - //Sanity check - if(isFileReplaceOperation()) { - if (deleteStorageLocation != null) { - // Finalize the delete of the physical file - // (File service will double-check that the datafile no - // longer exists in the database, before proceeding to - // delete the physical file) - try { - fileService.finalizeFileDelete(deleteFileId, deleteStorageLocation); - } catch (IOException ioex) { - logger.warning("Failed to delete the physical file associated with the deleted datafile id=" - + deleteFileId + ", storage location: " + deleteStorageLocation); - } + if (!multifile) { + try { + // Submit the update dataset command + // and update the local dataset object + // + dataset = commandEngine.submit(update_cmd); + } catch (CommandException ex) { + /** + * @todo Add a test to exercise this error. + */ + this.addErrorSevere(getBundleErr("add.add_file_error")); + logger.severe(ex.getMessage()); + return false; + } catch (EJBException ex) { + /** + * @todo Add a test to exercise this error. + */ + this.addErrorSevere("add.add_file_error (see logs)"); + logger.severe(ex.getMessage()); + return false; + } + } + + if (isFileReplaceOperation() && !multifile) { + // Finalize the delete of the physical file + // (File service will double-check that the datafile no + // longer exists in the database, before proceeding to + // delete the physical file) + try { + fileService.finalizeFileDelete(deleteFileId, deleteStorageLocation); + } catch (IOException ioex) { + logger.warning("Failed to delete the physical file associated with the deleted datafile id=" + + deleteFileId + ", storage location: " + deleteStorageLocation); } } return true; @@ -1766,7 +1762,7 @@ private boolean step_080_run_update_dataset_command_for_replace(){ } /* - * Go through the final file list, settting the rootFileId and previousFileId + * Go through the final file list, setting the rootFileId and previousFileId */ for (DataFile df : finalFileList) { df.setPreviousDataFileId(fileToReplace.getId()); @@ -1927,7 +1923,7 @@ private boolean step_100_startIngestJobs(){ //return true; //} - if (!this.isMultipleFilesAddOperation()) { + if (!multifile) { msg("pre ingest start"); // start the ingest! ingestService.startIngestJobsForDataset(dataset, dvRequest.getAuthenticatedUser()); @@ -2021,6 +2017,13 @@ public void setDuplicateFileWarning(String duplicateFileWarning) { this.duplicateFileWarning = duplicateFileWarning; } + /** Add multiple pre-positioned files listed in the jsonData. Works with direct upload, Globus, and other out-of-band methods. + * + * @param jsonData - an array of jsonData entries (one per file) using the single add file jsonData format + * @param dataset + * @param authUser + * @return + */ public Response addFiles(String jsonData, Dataset dataset, User authUser) { msgt("(addFilesToDataset) jsonData: " + jsonData.toString()); @@ -2033,15 +2036,14 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { // ----------------------------------------------------------- // Read jsonData and Parse files information from jsondata : // ----------------------------------------------------------- - try (StringReader rdr = new StringReader(jsonData)) { - JsonReader dbJsonReader = Json.createReader(rdr); - filesJson = dbJsonReader.readArray(); - dbJsonReader.close(); + try { + filesJson = JsonUtil.getJsonArray(jsonData); if (filesJson != null) { totalNumberofFiles = filesJson.getValuesAs(JsonObject.class).size(); - + workingVersion = dataset.getEditVersion(); + clone = workingVersion.cloneDatasetVersion(); for (JsonObject fileJson : filesJson.getValuesAs(JsonObject.class)) { OptionalFileParams optionalFileParams = null; @@ -2131,7 +2133,7 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { } try { - Command cmd = new UpdateDatasetVersionCommand(dataset, dvRequest); + Command cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, clone); ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); commandEngine.submit(cmd); } catch (CommandException ex) { @@ -2140,9 +2142,6 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { dataset = datasetService.find(dataset.getId()); - List s = dataset.getFiles(); - for (DataFile dataFile : s) { - } //ingest job ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser); @@ -2166,6 +2165,184 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { .add("status", STATUS_OK) .add("data", Json.createObjectBuilder().add("Files", jarr).add("Result", result)).build() ).build(); } + + /** + * Replace multiple files with prepositioned replacements as listed in the + * jsonData. Works with direct upload, Globus, and other out-of-band methods. + * + * @param jsonData - must include fileToReplaceId key with file ID and may include forceReplace key with true/false(default) + * @param dataset + * @param authUser + * @return + */ + + public Response replaceFiles(String jsonData, Dataset dataset, User authUser) { + msgt("(replaceFilesInDataset) jsonData: " + jsonData.toString()); + + JsonArrayBuilder jarr = Json.createArrayBuilder(); + + JsonArray filesJson = null; + + int totalNumberofFiles = 0; + int successNumberofFiles = 0; + // ----------------------------------------------------------- + // Read jsonData and Parse files information from jsondata : + // ----------------------------------------------------------- + try { + filesJson = JsonUtil.getJsonArray(jsonData); + + + if (filesJson != null) { + totalNumberofFiles = filesJson.getValuesAs(JsonObject.class).size(); + workingVersion = dataset.getEditVersion(); + clone = workingVersion.cloneDatasetVersion(); + for (JsonObject fileJson : filesJson.getValuesAs(JsonObject.class)) { + boolean forceReplace = false; + // (2a) Check for optional "forceReplace" + if ((fileJson.containsKey("forceReplace"))) { + forceReplace = fileJson.getBoolean("forceReplace", false); + } + long fileToReplaceId = -1; + JsonNumber ftri = fileJson.getJsonNumber("fileToReplaceId"); + if(ftri !=null) { + fileToReplaceId = ftri.longValueExact(); + } + + OptionalFileParams optionalFileParams = null; + try { + // (2b) Load up optional params via JSON + // - Will skip extra attributes which includes fileToReplaceId and forceReplace + optionalFileParams = new OptionalFileParams(fileJson.toString()); + + String newFilename = null; + String newFileContentType = null; + String newStorageIdentifier = null; + if ((fileToReplaceId !=-1) && optionalFileParams.hasStorageIdentifier()) { + newStorageIdentifier = optionalFileParams.getStorageIdentifier(); + newStorageIdentifier = DataAccess.expandStorageIdentifierIfNeeded(newStorageIdentifier); + if(!DataAccess.uploadToDatasetAllowed(dataset, newStorageIdentifier)) { + addErrorSevere("Dataset store configuration does not allow provided storageIdentifier."); + } + if (optionalFileParams.hasFileName()) { + newFilename = optionalFileParams.getFileName(); + if (optionalFileParams.hasMimetype()) { + newFileContentType = optionalFileParams.getMimeType(); + } + } + + msgt("REPLACE! = " + newFilename); + if (!hasError()) { + if (forceReplace){ + runForceReplaceFile(fileToReplaceId, + newFilename, + newFileContentType, + newStorageIdentifier, + null, + optionalFileParams, true); + }else{ + runReplaceFile(fileToReplaceId, + newFilename, + newFileContentType, + newStorageIdentifier, + null, + optionalFileParams, true); + } + } + if (hasError()) { + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("storageIdentifier", newStorageIdentifier) + .add("errorMessage", getHttpErrorCode().toString() +":"+ getErrorMessagesAsString("\n")) + .add("fileDetails", fileJson); + jarr.add(fileoutput); + } else { + JsonObject successresult = getSuccessResultAsJsonObjectBuilder().build(); + String duplicateWarning = getDuplicateFileWarning(); + + if (duplicateWarning != null && !duplicateWarning.isEmpty()) { + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("storageIdentifier", newStorageIdentifier) + .add("warningMessage", getDuplicateFileWarning()) + .add("fileDetails", successresult.getJsonArray("files").getJsonObject(0)); + jarr.add(fileoutput); + } else { + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("storageIdentifier", newStorageIdentifier) + .add("successMessage", "Replaced successfully in the dataset") + .add("fileDetails", successresult.getJsonArray("files").getJsonObject(0)); + jarr.add(fileoutput); + } + successNumberofFiles = successNumberofFiles + 1; + } + } else { + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("errorMessage", "You must provide a fileToReplaceId, storageidentifier, filename, and mimetype.") + .add("fileDetails", fileJson); + + jarr.add(fileoutput); + } + + } catch (DataFileTagException ex) { + Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex); + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("errorCode", Response.Status.BAD_REQUEST.getStatusCode()) + .add("message", ex.getMessage()) + .add("fileDetails", fileJson); + jarr.add(fileoutput); + + } + catch (NoFilesException ex) { + Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex); + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("errorCode", Response.Status.BAD_REQUEST.getStatusCode()) + .add("message", BundleUtil.getStringFromBundle("NoFileException! Serious Error! See administrator!")) + .add("fileDetails", fileJson); + jarr.add(fileoutput); + } + + }// End of adding files + + DatasetLock eipLock = dataset.getLockFor(DatasetLock.Reason.EditInProgress); + if (eipLock == null) { + logger.warning("Dataset not locked for EditInProgress "); + } else { + datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); + logger.info("Removed EditInProgress lock "); + } + + try { + Command cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, filesToDelete, clone); + ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); + commandEngine.submit(cmd); + } catch (CommandException ex) { + return error(Response.Status.INTERNAL_SERVER_ERROR, "CommandException updating DatasetVersion from addFiles job: " + ex.getMessage()); + } + + fileService.finalizeFileDeletes(deleteFileStorageLocations); + + dataset = datasetService.find(dataset.getId()); + + //ingest job + ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser); + + } + } + catch ( javax.json.stream.JsonParsingException ex) { + ex.printStackTrace(); + return error(BAD_REQUEST, "Json Parsing Exception :" + ex.getMessage()); + } + catch (Exception e) { + e.printStackTrace(); + return error(BAD_REQUEST, e.getMessage()); + } + + JsonObjectBuilder result = Json.createObjectBuilder() + .add("Total number of files", totalNumberofFiles) + .add("Number of files successfully replaced", successNumberofFiles); + + return Response.ok().entity(Json.createObjectBuilder() + .add("status", STATUS_OK) + .add("data", Json.createObjectBuilder().add("Files", jarr).add("Result", result)).build() ).build(); + } protected static Response error(Response.Status sts, String msg ) { return Response.status(sts) From e6bd5b3d63f4655a48080cdcda284e7507f9fd3f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 30 Sep 2022 10:27:54 -0400 Subject: [PATCH 242/608] docs --- .../developers/s3-direct-upload-api.rst | 38 ++++++++++++++++++- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst index 3dc73ce6a0c..b29b3421900 100644 --- a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst +++ b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst @@ -122,7 +122,7 @@ To add multiple Uploaded Files to the Dataset --------------------------------------------- Once the files exists in the s3 bucket, a final API call is needed to add all the files to the Dataset. In this API call, additional metadata is added using the "jsonData" parameter. -jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for: +jsonData for this call is an array of objects that normally include information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for: * "description" - A description of the file * "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset @@ -154,7 +154,7 @@ Replacing an existing file in the Dataset ----------------------------------------- Once the file exists in the s3 bucket, a final API call is needed to register it as a replacement of an existing file. This call is the same call used to replace a file to a Dataverse installation but, rather than sending the file bytes, additional metadata is added using the "jsonData" parameter. -jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, whether to allow the mimetype to change (forceReplace=true), etc. For direct uploads, the jsonData object must also include values for: +jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, whether to allow the mimetype to change (forceReplace=true), etc. For direct uploads, the jsonData object must include values for: * "storageIdentifier" - String, as specified in prior calls * "fileName" - String @@ -178,3 +178,37 @@ Note that the API call does not validate that the file matches the hash value su Note that this API call can be used independently of the others, e.g. supporting use cases in which the file already exists in S3/has been uploaded via some out-of-band method. With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifer must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. + +Replacing multiple existing files in the Dataset +------------------------------------------------ + +Once the replacement files exist in the s3 bucket, a final API call is needed to register them as replacements for existing files. In this API call, additional metadata is added using the "jsonData" parameter. +jsonData for this call is array of objects that normally include information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must include some additional values: + +* "fileToReplaceId" - the id of the file being replaced +* "forceReplace" - whether to replace a file with one of a different mimetype (optional, default is false) +* "description" - A description of the file +* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset +* "storageIdentifier" - String +* "fileName" - String +* "mimeType" - String +* "fixity/checksum" either: + + * "md5Hash" - String with MD5 hash value, or + * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings + + +The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512 + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV + export JSON_DATA="[{'fileToReplaceId': 10, 'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}, \ + {'fileToReplaceId': 10, 'forceReplace': true, 'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53', 'fileName':'file2.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123789'}}]" + + curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/replaceFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" + +Note that this API call can be used independently of the others, e.g. supporting use cases in which the files already exists in S3/has been uploaded via some out-of-band method. +With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifer must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. From 088cf8ac0248466b03bc2ae07e6c1d1439154f62 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 30 Sep 2022 10:31:24 -0400 Subject: [PATCH 243/608] release note --- doc/release-notes/9005-replaceFiles-api-call | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 doc/release-notes/9005-replaceFiles-api-call diff --git a/doc/release-notes/9005-replaceFiles-api-call b/doc/release-notes/9005-replaceFiles-api-call new file mode 100644 index 00000000000..b1df500251e --- /dev/null +++ b/doc/release-notes/9005-replaceFiles-api-call @@ -0,0 +1,3 @@ +9005 + +DIrect upload and out-of-band uploads can now be used to replace multiple files with one API call (complementing the prior ability to add multiple new files) \ No newline at end of file From 4ffccdb08675f92b3f6e2c46059b9d75ba97b077 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 30 Sep 2022 13:43:33 -0400 Subject: [PATCH 244/608] fix replaceFiles and remove hasError checks that block further changes hasError is not cleared where it was being used causing one error to skip all further add/replace calls and report that error for all subsequent files --- .../datasetutility/AddReplaceFileHelper.java | 32 +++++++------------ 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index 207f1e309be..efb05558b40 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -2067,10 +2067,9 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { } msgt("ADD! = " + newFilename); - if (!hasError()) { - runAddFileByDataset(dataset, newFilename, newFileContentType, newStorageIdentifier, - null, optionalFileParams, true); - } + + runAddFileByDataset(dataset, newFilename, newFileContentType, newStorageIdentifier, null, + optionalFileParams, true); if (hasError()) { JsonObjectBuilder fileoutput = Json.createObjectBuilder() .add("storageIdentifier", newStorageIdentifier) @@ -2176,9 +2175,10 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { * @return */ - public Response replaceFiles(String jsonData, Dataset dataset, User authUser) { + public Response replaceFiles(String jsonData, Dataset ds, User authUser) { msgt("(replaceFilesInDataset) jsonData: " + jsonData.toString()); + this.dataset = ds; JsonArrayBuilder jarr = Json.createArrayBuilder(); JsonArray filesJson = null; @@ -2231,22 +2231,12 @@ public Response replaceFiles(String jsonData, Dataset dataset, User authUser) { } msgt("REPLACE! = " + newFilename); - if (!hasError()) { - if (forceReplace){ - runForceReplaceFile(fileToReplaceId, - newFilename, - newFileContentType, - newStorageIdentifier, - null, - optionalFileParams, true); - }else{ - runReplaceFile(fileToReplaceId, - newFilename, - newFileContentType, - newStorageIdentifier, - null, - optionalFileParams, true); - } + if (forceReplace) { + runForceReplaceFile(fileToReplaceId, newFilename, newFileContentType, + newStorageIdentifier, null, optionalFileParams, true); + } else { + runReplaceFile(fileToReplaceId, newFilename, newFileContentType, newStorageIdentifier, + null, optionalFileParams, true); } if (hasError()) { JsonObjectBuilder fileoutput = Json.createObjectBuilder() From 9d2fc0585c136c21109fb624002438d562246c75 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 30 Sep 2022 13:45:07 -0400 Subject: [PATCH 245/608] relocate/rename entry for the /addFiles, /replaceFiles in native-api the title Add File Metadata has been misunderstood to mean the call can change the metadata for existing files which it can't. The entry was also in the File section when it is a dataset-level call --- doc/sphinx-guides/source/api/native-api.rst | 49 +++------------------ 1 file changed, 7 insertions(+), 42 deletions(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 93e1c36f179..e634bee37c9 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -1511,6 +1511,13 @@ The fully expanded example above (without environment variables) looks like this curl -H X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST https://demo.dataverse.org/api/datasets/:persistentId/add?persistentId=doi:10.5072/FK2/J8SJZB -F 'jsonData={"description":"A remote image.","storageIdentifier":"trsa://themes/custom/qdr/images/CoreTrustSeal-logo-transparent.png","checksumType":"MD5","md5Hash":"509ef88afa907eaf2c17c1c8d8fde77e","label":"testlogo.png","fileName":"testlogo.png","mimeType":"image/png"}' +Adding Files To a Dataset via Other Tools +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In some circumstances, it may be useful to move or copy files into Dataverse's storage manually or via external tools and then add then to a dataset (i.e. without involving Dataverse in the file transfer itself). +Two API calls are available for this use case to add files to a dataset or to replace files that were already in the dataset. +These calls were developed as part of Dataverse's direct upload mechanism and are detailed in :doc:`/developers/s3-direct-upload-api`. + Report the data (file) size of a Dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -2348,48 +2355,6 @@ The fully expanded example above (without environment variables) looks like this Note: The ``id`` returned in the json response is the id of the file metadata version. - -Adding File Metadata -~~~~~~~~~~~~~~~~~~~~ - -This API call requires a ``jsonString`` expressing the metadata of multiple files. It adds file metadata to the database table where the file has already been copied to the storage. - -The jsonData object includes values for: - -* "description" - A description of the file -* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset -* "storageIdentifier" - String -* "fileName" - String -* "mimeType" - String -* "fixity/checksum" either: - - * "md5Hash" - String with MD5 hash value, or - * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings - -.. note:: See :ref:`curl-examples-and-environment-variables` if you are unfamiliar with the use of ``export`` below. - -A curl example using an ``PERSISTENT_ID`` - -* ``SERVER_URL`` - e.g. https://demo.dataverse.org -* ``API_TOKEN`` - API endpoints require an API token that can be passed as the X-Dataverse-key HTTP header. For more details, see the :doc:`auth` section. -* ``PERSISTENT_IDENTIFIER`` - Example: ``doi:10.5072/FK2/7U7YBV`` - -.. code-block:: bash - - export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx - export SERVER_URL=https://demo.dataverse.org - export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV - export JSON_DATA="[{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}, \ - {'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53', 'fileName':'file2.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123789'}}]" - - curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/addFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" - -The fully expanded example above (without environment variables) looks like this: - -.. code-block:: bash - - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST https://demo.dataverse.org/api/datasets/:persistentId/addFiles?persistentId=doi:10.5072/FK2/7U7YBV -F jsonData='[{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}}, {"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123789"}}]' - Updating File Metadata ~~~~~~~~~~~~~~~~~~~~~~ From e26092a596ecf5b92b831e38b5459eec0371b4a5 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 3 Oct 2022 11:07:06 -0400 Subject: [PATCH 246/608] files may not have pids --- .../edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java | 4 +++- .../java/edu/harvard/iq/dataverse/metrics/MetricsUtil.java | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java index 0189faf6598..50c8c4098a1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java @@ -513,7 +513,9 @@ public JsonArray fileDownloads(String yyyymm, Dataverse d, boolean uniqueCounts) for (Object[] result : results) { JsonObjectBuilder job = Json.createObjectBuilder(); job.add(MetricsUtil.ID, (int) result[0]); - job.add(MetricsUtil.PID, (String) result[1]); + if(result[1]!=null) { + job.add(MetricsUtil.PID, (String) result[1]); + } job.add(MetricsUtil.COUNT, (long) result[2]); jab.add(job); } diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsUtil.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsUtil.java index 90b61bcb29c..72d8f5402bb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsUtil.java @@ -227,7 +227,9 @@ public static JsonArray timeSeriesByIDAndPIDToJson(List results) { JsonObjectBuilder job = Json.createObjectBuilder(); job.add(MetricsUtil.DATE, date); job.add(ID, id); - job.add(PID, pids.get(id)); + if(pids.get(id)!=null) { + job.add(PID, pids.get(id)); + } job.add(COUNT, totals.get(id)); jab.add(job); } From 0376a72f9a632e7256c1618acb7ab701d19442d1 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 3 Oct 2022 11:17:59 -0400 Subject: [PATCH 247/608] handle missing PIDs in CSV --- src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 339de904f9e..39d1d332884 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -2098,7 +2098,9 @@ public static String jsonArrayOfObjectsToCSV(JsonArray jsonArray, String... head JsonObject jo = (JsonObject) jv; String[] values = new String[headers.length]; for (int i = 0; i < headers.length; i++) { - values[i] = jo.get(headers[i]).toString(); + if(jo.containsKey(headers[i])) { + values[i] = jo.get(headers[i]).toString(); + } } csvSB.append("\n").append(String.join(",", values)); }); From d518f93dabbd0b3ab037b9344e4138d32f8a845f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 3 Oct 2022 11:22:00 -0400 Subject: [PATCH 248/608] minor doc update --- doc/sphinx-guides/source/api/metrics.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/api/metrics.rst b/doc/sphinx-guides/source/api/metrics.rst index 6a878d73a98..f1eb1f88c71 100755 --- a/doc/sphinx-guides/source/api/metrics.rst +++ b/doc/sphinx-guides/source/api/metrics.rst @@ -72,7 +72,7 @@ Return Formats There are a number of API calls that provide time series, information reported per item (e.g. per dataset, per file, by subject, by category, and by file Mimetype), or both (time series per item). Because these calls all report more than a single number, the API provides two optional formats for the return that can be selected by specifying an HTTP Accept Header for the desired format: -* application/json - a JSON array of objects. For time-series, the objects include key/values for the ``date`` and ``count`` for that month. For per-item calls, the objects include the item (e.g. for a subject), or it's id/pid (for a dataset or datafile). For timeseries per-item, the objects also include a date. In all cases, the response is a single array. +* application/json - a JSON array of objects. For time-series, the objects include key/values for the ``date`` and ``count`` for that month. For per-item calls, the objects include the item (e.g. for a subject), or it's id/pid (for a dataset or datafile (which may/may not not have a PID)). For timeseries per-item, the objects also include a date. In all cases, the response is a single array. * Example: ``curl -H 'Accept:application/json' https://demo.dataverse.org/api/info/metrics/downloads/monthly`` @@ -120,7 +120,7 @@ Example: ``curl https://demo.dataverse.org/api/info/metrics/makeDataCount/viewsT Endpoint Table -------------- -The following table lists the available metrics endpoints (not including the Make Data Counts endpoints a single dataset which are part of the :doc:`/api/native-api`) along with additional notes about them. +The following table lists the available metrics endpoints (not including the Make Data Counts endpoints for a single dataset which are part of the :doc:`/api/native-api`) along with additional notes about them. .. csv-table:: Metrics Endpoints From 531fe8eef706bf0026b1cd29dc71d6ec7af4431c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 5 Oct 2022 14:37:39 -0400 Subject: [PATCH 249/608] add test debug logging --- .../engine/command/impl/DRSSubmitToArchiveCommandTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java index 9cc9fae67ba..a0e79268e3d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java @@ -110,7 +110,8 @@ public void createJWT() throws CommandException { DecodedJWT jwt = JWT.decode(token1); System.out.println(jwt.getPayload()); } catch (Exception e) { - System.out.println(e.getLocalizedMessage()); + System.out.println(e.getClass() + e.getLocalizedMessage()); + e.printStackTrace(); //Any exception is a failure, otherwise decoding worked. Assert.fail(e.getLocalizedMessage()); } From 2d451dcb7de34772fdc536369abd12550559e81c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 5 Oct 2022 14:41:24 -0400 Subject: [PATCH 250/608] handle test case with no BrandingUtil --- .../engine/command/impl/DRSSubmitToArchiveCommand.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 89666f02db2..f23033f09fa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -305,7 +305,10 @@ public static String createJWTString(Algorithm algorithmRSA, String installation String canonicalBody = new JsonCanonicalizer(body).getEncodedString(); logger.fine("Canonical body: " + canonicalBody); String digest = DigestUtils.sha256Hex(canonicalBody); - return JWT.create().withIssuer(BrandingUtil.getInstallationBrandName()).withIssuedAt(Date.from(Instant.now())) + if(installationBrandName==null) { + installationBrandName = BrandingUtil.getInstallationBrandName(); + } + return JWT.create().withIssuer(installationBrandName).withIssuedAt(Date.from(Instant.now())) .withExpiresAt(Date.from(Instant.now().plusSeconds(60 * expirationInMinutes))) .withKeyId("defaultDataverse").withClaim("bodySHA256Hash", digest).sign(algorithmRSA); } From 63c175d2079410c29321f2f70976503b1b7e7c0c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 5 Oct 2022 14:47:42 -0400 Subject: [PATCH 251/608] add sleepForReindex call --- .../edu/harvard/iq/dataverse/api/HarvestingServerIT.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index 7579ab265fd..3d81b51df56 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -226,9 +226,8 @@ public void testOaiFunctionality() throws InterruptedException { // created and published: // - however, publish command is executed asynchronously, i.e. it may // still be running after we received the OK from the publish API. - // So let's give it a couple of extra seconds to finish, to make sure - // the dataset is published, exported and indexed - because the OAI - // set create API requires all of the above. + // So let's wait for it to finish. + UtilIT.sleepForReindex(datasetPersistentId, adminUserAPIKey, 10); Thread.sleep(3000L); String setName = identifier; String setQuery = "dsPersistentId:" + identifier; From d3bc18015f7616f124d4f078d9aed67877cbd3b1 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 5 Oct 2022 14:48:59 -0400 Subject: [PATCH 252/608] and remove old sleep --- .../java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index 3d81b51df56..5f0a6cec340 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -228,7 +228,7 @@ public void testOaiFunctionality() throws InterruptedException { // still be running after we received the OK from the publish API. // So let's wait for it to finish. UtilIT.sleepForReindex(datasetPersistentId, adminUserAPIKey, 10); - Thread.sleep(3000L); + String setName = identifier; String setQuery = "dsPersistentId:" + identifier; String apiPath = String.format("/api/harvest/server/oaisets/%s", setName); From ceb3968167de4e78d9b2ed1a48665ffdf8048ae1 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 5 Oct 2022 14:56:09 -0400 Subject: [PATCH 253/608] change sleep to be in seconds --- src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 7107ee783d7..c0a0a18bf63 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -2429,16 +2429,18 @@ static Boolean sleepForLock(String idOrPersistentId, String lockType, String api } - static boolean sleepForReindex(String idOrPersistentId, String apiToken, int duration) { + static boolean sleepForReindex(String idOrPersistentId, String apiToken, int durationInSeconds) { int i = 0; Response timestampResponse; + int sleepStep=200; + int repeats = durationInSeconds*1000/sleepStep; do { timestampResponse = UtilIT.getDatasetTimestamps(idOrPersistentId, apiToken); try { - Thread.sleep(200); + Thread.sleep(sleepStep); i++; - if (i > duration) { + if (i > repeats) { break; } } catch (InterruptedException ex) { @@ -2446,7 +2448,7 @@ static boolean sleepForReindex(String idOrPersistentId, String apiToken, int dur } } while (timestampResponse.body().jsonPath().getBoolean("data.hasStaleIndex")); - return i <= duration; + return i <= repeats; } From 8fb50a861dd463cc3fb16d854b70b37ee1b95a35 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 5 Oct 2022 16:52:05 -0400 Subject: [PATCH 254/608] change other use of sleepForReindex --- src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 529af5f746c..6e42e478863 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -3020,7 +3020,7 @@ public void testArchivalStatusAPI() throws IOException { } assertEquals(OK.getStatusCode(), status); - if (!UtilIT.sleepForReindex(datasetPersistentId, apiToken, 3000)) { + if (!UtilIT.sleepForReindex(datasetPersistentId, apiToken, 3)) { logger.info("Still indexing after 3 seconds"); } From a8258ea8b9362eaf3a897de91ba6dd31fe0f6d56 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 5 Oct 2022 14:47:42 -0400 Subject: [PATCH 255/608] add sleepForReindex call --- .../edu/harvard/iq/dataverse/api/HarvestingServerIT.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index 7579ab265fd..3d81b51df56 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -226,9 +226,8 @@ public void testOaiFunctionality() throws InterruptedException { // created and published: // - however, publish command is executed asynchronously, i.e. it may // still be running after we received the OK from the publish API. - // So let's give it a couple of extra seconds to finish, to make sure - // the dataset is published, exported and indexed - because the OAI - // set create API requires all of the above. + // So let's wait for it to finish. + UtilIT.sleepForReindex(datasetPersistentId, adminUserAPIKey, 10); Thread.sleep(3000L); String setName = identifier; String setQuery = "dsPersistentId:" + identifier; From 48d0f4cbbd281af0a5f258504ef03ef7a7596748 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 5 Oct 2022 14:48:59 -0400 Subject: [PATCH 256/608] and remove old sleep --- .../java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index 3d81b51df56..5f0a6cec340 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -228,7 +228,7 @@ public void testOaiFunctionality() throws InterruptedException { // still be running after we received the OK from the publish API. // So let's wait for it to finish. UtilIT.sleepForReindex(datasetPersistentId, adminUserAPIKey, 10); - Thread.sleep(3000L); + String setName = identifier; String setQuery = "dsPersistentId:" + identifier; String apiPath = String.format("/api/harvest/server/oaisets/%s", setName); From 463b033c6951ecf3a828c515fb469cba4a42b418 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 5 Oct 2022 14:56:09 -0400 Subject: [PATCH 257/608] change sleep to be in seconds --- src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 7107ee783d7..c0a0a18bf63 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -2429,16 +2429,18 @@ static Boolean sleepForLock(String idOrPersistentId, String lockType, String api } - static boolean sleepForReindex(String idOrPersistentId, String apiToken, int duration) { + static boolean sleepForReindex(String idOrPersistentId, String apiToken, int durationInSeconds) { int i = 0; Response timestampResponse; + int sleepStep=200; + int repeats = durationInSeconds*1000/sleepStep; do { timestampResponse = UtilIT.getDatasetTimestamps(idOrPersistentId, apiToken); try { - Thread.sleep(200); + Thread.sleep(sleepStep); i++; - if (i > duration) { + if (i > repeats) { break; } } catch (InterruptedException ex) { @@ -2446,7 +2448,7 @@ static boolean sleepForReindex(String idOrPersistentId, String apiToken, int dur } } while (timestampResponse.body().jsonPath().getBoolean("data.hasStaleIndex")); - return i <= duration; + return i <= repeats; } From 85a3306e6ab3d20aa206a9669926cbc462b5a76c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 5 Oct 2022 16:56:14 -0400 Subject: [PATCH 258/608] update test to use seconds --- src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 529af5f746c..6e42e478863 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -3020,7 +3020,7 @@ public void testArchivalStatusAPI() throws IOException { } assertEquals(OK.getStatusCode(), status); - if (!UtilIT.sleepForReindex(datasetPersistentId, apiToken, 3000)) { + if (!UtilIT.sleepForReindex(datasetPersistentId, apiToken, 3)) { logger.info("Still indexing after 3 seconds"); } From 735f8a121334ffba6693b3153d50ce7c1bf2cdb9 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 5 Oct 2022 17:01:37 -0400 Subject: [PATCH 259/608] cleanup, slower steps, add logging --- .../java/edu/harvard/iq/dataverse/api/UtilIT.java | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index c0a0a18bf63..29b25a07983 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -2432,22 +2432,19 @@ static Boolean sleepForLock(String idOrPersistentId, String lockType, String api static boolean sleepForReindex(String idOrPersistentId, String apiToken, int durationInSeconds) { int i = 0; Response timestampResponse; - int sleepStep=200; - int repeats = durationInSeconds*1000/sleepStep; + int sleepStep=500; + int repeats = durationInSeconds*(1000/sleepStep); do { timestampResponse = UtilIT.getDatasetTimestamps(idOrPersistentId, apiToken); try { Thread.sleep(sleepStep); i++; - if (i > repeats) { - break; - } } catch (InterruptedException ex) { Logger.getLogger(UtilIT.class.getName()).log(Level.SEVERE, null, ex); } - } while (timestampResponse.body().jsonPath().getBoolean("data.hasStaleIndex")); - + } while ((i <= repeats) && timestampResponse.body().jsonPath().getBoolean("data.hasStaleIndex")); + Logger.getLogger(UtilIT.class.getName()).info("Waited " + (i * (sleepStep/1000)) + " seconds"); return i <= repeats; } From 1f0c7e7e1b4ec5404eb26c7c0f73b4aae1698dfc Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 5 Oct 2022 18:18:15 -0400 Subject: [PATCH 260/608] more logging, handle Interrupted better --- .../java/edu/harvard/iq/dataverse/api/UtilIT.java | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 29b25a07983..ce7aefb3820 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -2432,19 +2432,24 @@ static Boolean sleepForLock(String idOrPersistentId, String lockType, String api static boolean sleepForReindex(String idOrPersistentId, String apiToken, int durationInSeconds) { int i = 0; Response timestampResponse; - int sleepStep=500; - int repeats = durationInSeconds*(1000/sleepStep); + int sleepStep = 500; + int repeats = durationInSeconds * (1000 / sleepStep); + boolean stale=true; do { timestampResponse = UtilIT.getDatasetTimestamps(idOrPersistentId, apiToken); + String hasStaleIndex = timestampResponse.body().jsonPath().getString("data.hasStaleIndex"); + System.out.println(hasStaleIndex); + stale = Boolean.parseBoolean(hasStaleIndex); try { Thread.sleep(sleepStep); i++; } catch (InterruptedException ex) { Logger.getLogger(UtilIT.class.getName()).log(Level.SEVERE, null, ex); + i = repeats + 1; } - } while ((i <= repeats) && timestampResponse.body().jsonPath().getBoolean("data.hasStaleIndex")); - Logger.getLogger(UtilIT.class.getName()).info("Waited " + (i * (sleepStep/1000)) + " seconds"); + } while ((i <= repeats) && stale); + System.out.println("Waited " + (i * (sleepStep / 1000)) + " seconds"); return i <= repeats; } From 0861ea84dc56725cd1e8082b8ea2d9aae14b05d3 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 5 Oct 2022 18:18:27 -0400 Subject: [PATCH 261/608] try longer wait for export --- .../java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index 5f0a6cec340..290cde7b3e6 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -246,7 +246,7 @@ public void testOaiFunctionality() throws InterruptedException { Response exportSetResponse = UtilIT.exportOaiSet(setName); assertEquals(200, exportSetResponse.getStatusCode()); //SEK 09/04/2019 resonable wait time for export OAI? #6128 - Thread.sleep(5000L); + Thread.sleep(10000L); Response getSet = given() .get(apiPath); From b59f4835074518fc8374e4f86b4a8f36dc3ccb58 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 6 Oct 2022 11:40:51 +0200 Subject: [PATCH 262/608] dataset files cleanup --- .../harvard/iq/dataverse/api/Datasets.java | 60 +++++++++++++++++ .../iq/dataverse/dataaccess/FileAccessIO.java | 41 ++++++++++++ .../dataverse/dataaccess/InputStreamIO.java | 12 +++- .../dataaccess/RemoteOverlayAccessIO.java | 9 +++ .../iq/dataverse/dataaccess/S3AccessIO.java | 65 ++++++++++++++++++- .../iq/dataverse/dataaccess/StorageIO.java | 4 ++ .../dataverse/dataaccess/SwiftAccessIO.java | 57 ++++++++++++++-- 7 files changed, 241 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index aff543e643c..0cdb2b3a73f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -66,6 +66,7 @@ import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.dataaccess.S3AccessIO; +import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.UnforcedCommandException; import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetStorageSizeCommand; @@ -2502,6 +2503,65 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, } // end: addFileToDataset + /** + * Clean storage of a Dataset + * + * @param idSupplied + * @return + */ + @GET + @Path("{id}/cleanStorage") + public Response cleanStorage(@PathParam("id") String idSupplied) { + // get user and dataset + User authUser; + try { + authUser = findUserOrDie(); + } catch (WrappedResponse ex) { + return error(Response.Status.FORBIDDEN, + BundleUtil.getStringFromBundle("file.addreplace.error.auth") + ); + } + + Dataset dataset; + try { + dataset = findDatasetOrDie(idSupplied); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + + // check permissions + if (!permissionSvc.permissionsFor(createDataverseRequest(authUser), dataset).contains(Permission.EditDataset)) { + return error(Response.Status.INTERNAL_SERVER_ERROR, "Access denied!"); + } + + List deleted = new ArrayList<>(); + Set files = new HashSet(); + try { + for (DatasetVersion dv : dataset.getVersions()) { + for (FileMetadata f : dv.getFileMetadatas()) { + String storageIdentifier = f.getDataFile().getStorageIdentifier(); + String location = storageIdentifier.substring(storageIdentifier.indexOf("://") + 3); + String[] locationParts = location.split(":", 3);//separate bucket, swift container, etc. from fileName + files.add(locationParts[locationParts.length-1]); + } + } + StorageIO datasetIO = DataAccess.getStorageIO(dataset); + List allDatasetFiles = datasetIO.listAllFiles(); + for (String f : allDatasetFiles) { + if (!files.contains(f)) { + datasetIO.deleteFile(f); + deleted.add(f); + } + } + } catch (IOException ex) { + logger.log(Level.SEVERE, null, ex); + return error(Response.Status.INTERNAL_SERVER_ERROR, "IOException! Serious Error! See administrator!"); + } + + return ok("Found: " + files.stream().collect(Collectors.joining(", ")) + "\n" + "Deleted: " + deleted.stream().collect(Collectors.joining(", "))); + + } + private void msg(String m) { //System.out.println(m); logger.fine(m); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index d5f00b9868f..2bb3abf03a6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -683,4 +683,45 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { } return true; } + + public List listAllFiles() throws IOException { + Dataset dataset = this.getDataset(); + if (dataset == null) { + throw new IOException("This FileAccessIO object hasn't been properly initialized."); + } + + Path datasetDirectoryPath = Paths.get(dataset.getAuthorityForFileStorage(), dataset.getIdentifierForFileStorage()); + if (datasetDirectoryPath == null) { + throw new IOException("Could not determine the filesystem directory of the dataset."); + } + + DirectoryStream dirStream = Files.newDirectoryStream(Paths.get(this.getFilesRootDirectory(), datasetDirectoryPath.toString())); + + List res = new ArrayList<>(); + if (dirStream != null) { + for (Path filePath : dirStream) { + res.add(filePath.getFileName().toString()); + } + dirStream.close(); + } + + return res; + } + + @Override + public void deleteFile(String fileName) throws IOException { + Dataset dataset = this.getDataset(); + if (dataset == null) { + throw new IOException("This FileAccessIO object hasn't been properly initialized."); + } + + Path datasetDirectoryPath = Paths.get(dataset.getAuthorityForFileStorage(), dataset.getIdentifierForFileStorage()); + if (datasetDirectoryPath == null) { + throw new IOException("Could not determine the filesystem directory of the dataset."); + } + + Path p = Paths.get(this.getFilesRootDirectory(), datasetDirectoryPath.toString(), fileName); + Files.delete(p); + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java index c9796d24b27..1235b386fe9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java @@ -159,5 +159,15 @@ public void revertBackupAsAux(String auxItemTag) throws IOException { throw new UnsupportedDataAccessOperationException("InputStreamIO: this method is not supported in this DataAccess driver."); } - + @Override + public List listAllFiles() throws IOException { + throw new UnsupportedDataAccessOperationException("InputStreamIO: tthis method is not supported in this DataAccess driver."); + } + + + @Override + public void deleteFile(String fileName) throws IOException { + throw new UnsupportedDataAccessOperationException("InputStreamIO: this method is not supported in this DataAccess driver."); + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index c8e42349318..b7fb4c86c7c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -631,4 +631,13 @@ public static String getBaseStoreIdFor(String driverId) { return System.getProperty("dataverse.files." + driverId + ".base-store"); } + @Override + public List listAllFiles() throws IOException { + return baseStore.listAllFiles(); + } + + @Override + public void deleteFile(String fileName) throws IOException { + baseStore.deleteFile(fileName); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 3c9cef04980..3796d7f0ce9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -1307,4 +1307,67 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { } -} + + @Override + public List listAllFiles() throws IOException { + if (!this.canWrite()) { + open(); + } + Dataset dataset = this.getDataset(); + if (dataset == null) { + throw new IOException("This S3AccessIO object hasn't been properly initialized."); + } + String prefix = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/"; + + List ret = new ArrayList<>(); + ListObjectsRequest req = new ListObjectsRequest().withBucketName(bucketName).withPrefix(prefix); + ObjectListing storedFilesList = null; + try { + storedFilesList = s3.listObjects(req); + } catch (SdkClientException sce) { + throw new IOException ("S3 listObjects: failed to get a listing for " + prefix); + } + if (storedFilesList == null) { + return ret; + } + List storedFilesSummary = storedFilesList.getObjectSummaries(); + try { + while (storedFilesList.isTruncated()) { + logger.fine("S3 listObjects: going to next page of list"); + storedFilesList = s3.listNextBatchOfObjects(storedFilesList); + if (storedFilesList != null) { + storedFilesSummary.addAll(storedFilesList.getObjectSummaries()); + } + } + } catch (AmazonClientException ase) { + //logger.warning("Caught an AmazonServiceException in S3AccessIO.listObjects(): " + ase.getMessage()); + throw new IOException("S3AccessIO: Failed to get objects for listing."); + } + + for (S3ObjectSummary item : storedFilesSummary) { + String fileName = item.getKey().substring(prefix.length()); + ret.add(fileName); + } + return ret; + } + + @Override + public void deleteFile(String fileName) throws IOException { + if (!this.canWrite()) { + open(); + } + Dataset dataset = this.getDataset(); + if (dataset == null) { + throw new IOException("This S3AccessIO object hasn't been properly initialized."); + } + String prefix = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/"; + + try { + DeleteObjectRequest dor = new DeleteObjectRequest(bucketName, prefix + fileName); + s3.deleteObject(dor); + } catch (AmazonClientException ase) { + logger.warning("S3AccessIO: Unable to delete object " + ase.getMessage()); + } + } + +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 90e4a54dbe8..0e42a84795c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -622,4 +622,8 @@ protected static boolean usesStandardNamePattern(String identifier) { return m.find(); } + public abstract List listAllFiles() throws IOException; + + public abstract void deleteFile(String fileName) throws IOException; + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java index b1725b040a3..5a376cb8d91 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java @@ -864,13 +864,16 @@ public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException } } + private String getSwiftContainerName(Dataset dataset) { + String authorityNoSlashes = dataset.getAuthorityForFileStorage().replace("/", swiftFolderPathSeparator); + return dataset.getProtocolForFileStorage() + swiftFolderPathSeparator + authorityNoSlashes.replace(".", swiftFolderPathSeparator) + + swiftFolderPathSeparator + dataset.getIdentifierForFileStorage(); + } + @Override public String getSwiftContainerName() { if (dvObject instanceof DataFile) { - String authorityNoSlashes = this.getDataFile().getOwner().getAuthorityForFileStorage().replace("/", swiftFolderPathSeparator); - return this.getDataFile().getOwner().getProtocolForFileStorage() + swiftFolderPathSeparator - + authorityNoSlashes.replace(".", swiftFolderPathSeparator) + - swiftFolderPathSeparator + this.getDataFile().getOwner().getIdentifierForFileStorage(); + return getSwiftContainerName(this.getDataFile().getOwner()); } return null; } @@ -893,5 +896,49 @@ public static String calculateRFC2104HMAC(String data, String key) mac.init(signingKey); return toHexString(mac.doFinal(data.getBytes())); } - + + @Override + public List listAllFiles() throws IOException { + if (!this.canWrite()) { + open(DataAccessOption.WRITE_ACCESS); + } + Dataset dataset = this.getDataset(); + if (dataset == null) { + throw new IOException("This SwiftAccessIO object hasn't been properly initialized."); + } + String prefix = getSwiftContainerName(dataset) + swiftFolderPathSeparator; + + Collection items; + String lastItemName = null; + List ret = new ArrayList<>(); + + while ((items = this.swiftContainer.list(prefix, lastItemName, LIST_PAGE_LIMIT)) != null && items.size() > 0) { + for (StoredObject item : items) { + lastItemName = item.getName().substring(prefix.length()); + ret.add(lastItemName); + } + } + + return ret; + } + + @Override + public void deleteFile(String fileName) throws IOException { + if (!this.canWrite()) { + open(DataAccessOption.WRITE_ACCESS); + } + Dataset dataset = this.getDataset(); + if (dataset == null) { + throw new IOException("This SwiftAccessIO object hasn't been properly initialized."); + } + String prefix = getSwiftContainerName(dataset) + swiftFolderPathSeparator; + + StoredObject fileObject = this.swiftContainer.getObject(prefix + fileName); + + if (!fileObject.exists()) { + throw new FileNotFoundException("SwiftAccessIO/Direct Access: " + fileName + " does not exist"); + } + + fileObject.delete(); + } } From 31247d85e59feaf3aaf06d0a62a9d101dcff43ad Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 6 Oct 2022 12:15:31 -0400 Subject: [PATCH 263/608] tweak timing --- .../java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index 290cde7b3e6..ca5654fa49f 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -227,6 +227,7 @@ public void testOaiFunctionality() throws InterruptedException { // - however, publish command is executed asynchronously, i.e. it may // still be running after we received the OK from the publish API. // So let's wait for it to finish. + Thread.sleep(1000L); UtilIT.sleepForReindex(datasetPersistentId, adminUserAPIKey, 10); String setName = identifier; @@ -246,7 +247,7 @@ public void testOaiFunctionality() throws InterruptedException { Response exportSetResponse = UtilIT.exportOaiSet(setName); assertEquals(200, exportSetResponse.getStatusCode()); //SEK 09/04/2019 resonable wait time for export OAI? #6128 - Thread.sleep(10000L); + Thread.sleep(5000L); Response getSet = given() .get(apiPath); From 0b0c3b937e4eeef584a0313a76d01bc51fe40ea3 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 6 Oct 2022 14:52:35 -0400 Subject: [PATCH 264/608] add multivalued in schema --- conf/solr/8.11.1/schema.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/solr/8.11.1/schema.xml b/conf/solr/8.11.1/schema.xml index 381d72d2756..063ffa9bd29 100644 --- a/conf/solr/8.11.1/schema.xml +++ b/conf/solr/8.11.1/schema.xml @@ -679,9 +679,9 @@ - + - + From 547b4a410edcb8b3f64290cc3c9a0cf7f7fdb5f8 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 6 Oct 2022 15:00:53 -0400 Subject: [PATCH 265/608] more testing of timing and sleepForReindex --- .../java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java | 4 ++-- src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index ca5654fa49f..7498c71bfc5 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -227,9 +227,9 @@ public void testOaiFunctionality() throws InterruptedException { // - however, publish command is executed asynchronously, i.e. it may // still be running after we received the OK from the publish API. // So let's wait for it to finish. - Thread.sleep(1000L); + Thread.sleep(200L); UtilIT.sleepForReindex(datasetPersistentId, adminUserAPIKey, 10); - + Thread.sleep(5000L); String setName = identifier; String setQuery = "dsPersistentId:" + identifier; String apiPath = String.format("/api/harvest/server/oaisets/%s", setName); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index ce7aefb3820..4ea2cc5f2d2 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -2437,6 +2437,7 @@ static boolean sleepForReindex(String idOrPersistentId, String apiToken, int dur boolean stale=true; do { timestampResponse = UtilIT.getDatasetTimestamps(idOrPersistentId, apiToken); + System.out.println(timestampResponse.body().asString()); String hasStaleIndex = timestampResponse.body().jsonPath().getString("data.hasStaleIndex"); System.out.println(hasStaleIndex); stale = Boolean.parseBoolean(hasStaleIndex); From cf9b4aee9c5914ff6046fabd188466eb7d3bba1f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 6 Oct 2022 16:19:56 -0400 Subject: [PATCH 266/608] case matters --- conf/solr/8.11.1/schema.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/solr/8.11.1/schema.xml b/conf/solr/8.11.1/schema.xml index 063ffa9bd29..e9fbb35403e 100644 --- a/conf/solr/8.11.1/schema.xml +++ b/conf/solr/8.11.1/schema.xml @@ -679,9 +679,9 @@ - + - + From f542925e24e301b4c9c5336fbfc8f5bbc01c6f79 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 6 Oct 2022 16:43:24 -0400 Subject: [PATCH 267/608] north < south latitude is an error we may want to test for that, but not here. --- doc/sphinx-guides/source/_static/api/ddi_dataset.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/_static/api/ddi_dataset.xml b/doc/sphinx-guides/source/_static/api/ddi_dataset.xml index 05eaadc3458..850e6e72ba2 100644 --- a/doc/sphinx-guides/source/_static/api/ddi_dataset.xml +++ b/doc/sphinx-guides/source/_static/api/ddi_dataset.xml @@ -88,8 +88,8 @@ 10 20 - 30 - 40 + 40 + 30 80 From 9cae388d96a9e794b93cb83375f48a6963721e3d Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 7 Oct 2022 09:09:08 -0400 Subject: [PATCH 268/608] remove sleep so error logging shows --- .../java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index 7498c71bfc5..a9043d49032 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -229,7 +229,7 @@ public void testOaiFunctionality() throws InterruptedException { // So let's wait for it to finish. Thread.sleep(200L); UtilIT.sleepForReindex(datasetPersistentId, adminUserAPIKey, 10); - Thread.sleep(5000L); + String setName = identifier; String setQuery = "dsPersistentId:" + identifier; String apiPath = String.format("/api/harvest/server/oaisets/%s", setName); From 4f9434e9e09689ea8afcbbf7fb36656b7b77e2ae Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 12 Oct 2022 15:34:45 -0400 Subject: [PATCH 269/608] fix another non-physical box --- doc/sphinx-guides/source/_static/api/ddi_dataset.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/_static/api/ddi_dataset.xml b/doc/sphinx-guides/source/_static/api/ddi_dataset.xml index 850e6e72ba2..014ebb8c581 100644 --- a/doc/sphinx-guides/source/_static/api/ddi_dataset.xml +++ b/doc/sphinx-guides/source/_static/api/ddi_dataset.xml @@ -92,8 +92,8 @@ 30 - 80 - 70 + 70 + 80 60 50 From 1db095fa8cbb947beddc1792761ed6bebf099db8 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 12 Oct 2022 16:58:48 -0400 Subject: [PATCH 270/608] handle multiples - make bbox a single surrounding box --- conf/solr/8.11.1/schema.xml | 3 --- .../iq/dataverse/search/IndexServiceBean.java | 22 +++++++++++++++++++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/conf/solr/8.11.1/schema.xml b/conf/solr/8.11.1/schema.xml index e9fbb35403e..10f1d8f1f4f 100644 --- a/conf/solr/8.11.1/schema.xml +++ b/conf/solr/8.11.1/schema.xml @@ -647,9 +647,6 @@ - - - diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 63412c59b56..766d2a05e6d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -38,6 +38,7 @@ import java.io.IOException; import java.io.InputStream; import java.sql.Timestamp; +import java.text.NumberFormat; import java.text.SimpleDateFormat; import java.time.LocalDate; import java.util.ArrayList; @@ -951,6 +952,10 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set Float.parseFloat(westLon)) { + minWestLon=westLon; + } + if(maxEastLon==null || Float.parseFloat(maxEastLon) < Float.parseFloat(eastLon)) { + maxEastLon=eastLon; + } + if(minSouthLat==null || Float.parseFloat(minSouthLat) > Float.parseFloat(southLat)) { + minSouthLat=southLat; + } + if(maxNorthLat==null || Float.parseFloat(maxNorthLat) < Float.parseFloat(northLat)) { + maxNorthLat=northLat; + } //W, E, N, S solrInputDocument.addField("solr_srpt", "ENVELOPE(" + westLon + "," + eastLon + "," + northLat + "," + southLat + ")"); } + //Only one bbox per dataset + //W, E, N, S + solrInputDocument.addField("solr_bbox", "ENVELOPE(" + minWestLon + "," + maxEastLon + "," + maxNorthLat + "," + minSouthLat + ")"); + } } } From 202438af792eef1ff5db5835b5ca350eea366c32 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 12 Oct 2022 21:50:21 -0400 Subject: [PATCH 271/608] typo --- .../java/edu/harvard/iq/dataverse/search/IndexServiceBean.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 766d2a05e6d..05947ee1224 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -1007,7 +1007,7 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set Date: Thu, 13 Oct 2022 13:31:39 -0400 Subject: [PATCH 272/608] wrong scope --- .../edu/harvard/iq/dataverse/search/IndexServiceBean.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 05947ee1224..6c4fb3f1332 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -1005,11 +1005,10 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set Date: Fri, 14 Oct 2022 11:13:59 +0800 Subject: [PATCH 273/608] encoding change. From de6be210c10c491eeba8fb5dce9d74a3cff06926 Mon Sep 17 00:00:00 2001 From: xflv Date: Fri, 14 Oct 2022 11:27:52 +0800 Subject: [PATCH 274/608] update encoding change. --- src/main/java/propertyFiles/citation.properties | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/propertyFiles/citation.properties b/src/main/java/propertyFiles/citation.properties index b382f8a5a1e..ef8b44d7114 100644 --- a/src/main/java/propertyFiles/citation.properties +++ b/src/main/java/propertyFiles/citation.properties @@ -346,7 +346,7 @@ controlledvocabulary.language.galician=Galician controlledvocabulary.language.georgian=Georgian controlledvocabulary.language.german=German controlledvocabulary.language.greek_(modern)=Greek (modern) -controlledvocabulary.language.guarani=Guaraní +controlledvocabulary.language.guarani=Guaraní controlledvocabulary.language.gujarati=Gujarati controlledvocabulary.language.haitian,_haitian_creole=Haitian, Haitian Creole controlledvocabulary.language.hausa=Hausa @@ -406,7 +406,7 @@ controlledvocabulary.language.navajo,_navaho=Navajo, Navaho controlledvocabulary.language.northern_ndebele=Northern Ndebele controlledvocabulary.language.nepali=Nepali controlledvocabulary.language.ndonga=Ndonga -controlledvocabulary.language.norwegian_bokmal=Norwegian BokmÃ¥l +controlledvocabulary.language.norwegian_bokmal=Norwegian Bokmål controlledvocabulary.language.norwegian_nynorsk=Norwegian Nynorsk controlledvocabulary.language.norwegian=Norwegian controlledvocabulary.language.nuosu=Nuosu @@ -468,7 +468,7 @@ controlledvocabulary.language.urdu=Urdu controlledvocabulary.language.uzbek=Uzbek controlledvocabulary.language.venda=Venda controlledvocabulary.language.vietnamese=Vietnamese -controlledvocabulary.language.volapuk=Volapük +controlledvocabulary.language.volapuk=Volapük controlledvocabulary.language.walloon=Walloon controlledvocabulary.language.welsh=Welsh controlledvocabulary.language.wolof=Wolof @@ -478,4 +478,4 @@ controlledvocabulary.language.yiddish=Yiddish controlledvocabulary.language.yoruba=Yoruba controlledvocabulary.language.zhuang,_chuang=Zhuang, Chuang controlledvocabulary.language.zulu=Zulu -controlledvocabulary.language.not_applicable=Not applicable +controlledvocabulary.language.not_applicable=Not applicable \ No newline at end of file From c2f9c58e9fdd623a711d652d434de76466600f7e Mon Sep 17 00:00:00 2001 From: chenganj Date: Fri, 14 Oct 2022 10:48:41 -0400 Subject: [PATCH 275/608] license name translation --- .../iq/dataverse/dataset/DatasetUtil.java | 31 +++++++++++++------ .../java/propertyFiles/License.properties | 4 ++- src/main/webapp/dataset-license-terms.xhtml | 6 ++-- .../webapp/datasetLicenseInfoFragment.xhtml | 6 ++-- 4 files changed, 30 insertions(+), 17 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java index 2db20377169..31e45aebf18 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java @@ -575,27 +575,38 @@ public static String getLicenseDescription(DatasetVersion dsv) { License license = DatasetUtil.getLicense(dsv); if (license != null) { - return getLocalizedLicenseDescription(license.getName()) ; + return getLocalizedLicense(license.getName(),"description") ; } else { return BundleUtil.getStringFromBundle("license.custom.description"); } } - public static String getLocalizedLicenseDescription(String licenseName) { - String key = "license." + licenseName.toLowerCase().replace(" ","_") + ".description"; - if (key != null) { + public static String getLocalizedLicense(String licenseName,String keyPart) { + String key = "license." + licenseName.toLowerCase().replace(" ", "_") + "." + keyPart; + + String second_key = ""; + if (keyPart == "description") + { + second_key = "license.custom.description"; + } + else + { + second_key = "license.custom"; + } + + if (key != null) { try { - String _description = BundleUtil.getStringFromPropertyFile(key, "License"); - if (_description == null) { - return BundleUtil.getStringFromBundle("license.custom.description"); + String propertyValue = BundleUtil.getStringFromPropertyFile(key, "License"); + if (propertyValue == null) { + return BundleUtil.getStringFromBundle(second_key); } else { - return _description; + return propertyValue; } } catch (MissingResourceException mre) { - return BundleUtil.getStringFromBundle("license.custom.description"); + return BundleUtil.getStringFromBundle(second_key); } } else { - return BundleUtil.getStringFromBundle("license.custom.description"); + return BundleUtil.getStringFromBundle(second_key); } } diff --git a/src/main/java/propertyFiles/License.properties b/src/main/java/propertyFiles/License.properties index f6def616a04..2347fed9db6 100644 --- a/src/main/java/propertyFiles/License.properties +++ b/src/main/java/propertyFiles/License.properties @@ -1,2 +1,4 @@ license.cc0_1.0.description=Creative Commons CC0 1.0 Universal Public Domain Dedication. -license.cc_by_4.0.description=Creative Commons Attribution 4.0 International License. \ No newline at end of file +license.cc_by_4.0.description=Creative Commons Attribution 4.0 International License. +license.cc0_1.0.name=CC0 1.0 +license.cc_by_4.0.name=CC-BY 4.0 diff --git a/src/main/webapp/dataset-license-terms.xhtml b/src/main/webapp/dataset-license-terms.xhtml index 3669d199283..429dee9b14a 100644 --- a/src/main/webapp/dataset-license-terms.xhtml +++ b/src/main/webapp/dataset-license-terms.xhtml @@ -46,7 +46,7 @@

+ var="license" itemLabel="#{DatasetUtil:getLocalizedLicense(license.name, 'name')}" itemValue="#{license}"/> @@ -55,8 +55,8 @@

- - #{termsOfUseAndAccess.license.name} + + #{termsOfUseAndAccess.license.name}

diff --git a/src/main/webapp/datasetLicenseInfoFragment.xhtml b/src/main/webapp/datasetLicenseInfoFragment.xhtml index e5d10c745dd..797d20b8a25 100644 --- a/src/main/webapp/datasetLicenseInfoFragment.xhtml +++ b/src/main/webapp/datasetLicenseInfoFragment.xhtml @@ -30,12 +30,12 @@ xmlns:jsf="http://xmlns.jcp.org/jsf">
+ jsf:rendered="#{!empty DatasetUtil:getLocalizedLicense(DatasetPage.workingVersion.termsOfUseAndAccess.license.name,'description')} }">
- +
@@ -121,4 +121,4 @@ xmlns:jsf="http://xmlns.jcp.org/jsf"> - \ No newline at end of file + From c47d2aeaff51967b79a641c3e246640541333c40 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 10 Oct 2022 14:50:21 -0400 Subject: [PATCH 276/608] possible fix for curate issue when a file is the dataset thumb --- .../impl/CuratePublishedDatasetVersionCommand.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java index 772b6205b02..4e86f5c60dd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java @@ -99,6 +99,10 @@ public Dataset execute(CommandContext ctxt) throws CommandException { logger.severe("Draft version of dataset: " + tempDataset.getId() + " has: " + newFileCount + " while last published version has " + pubFileCount); throw new IllegalCommandException(BundleUtil.getStringFromBundle("datasetversion.update.failure"), this); } + Long thumbId = null; + if(tempDataset.getThumbnailFile()!=null) { + thumbId = tempDataset.getThumbnailFile().getId(); + }; for (FileMetadata publishedFmd : pubFmds) { DataFile dataFile = publishedFmd.getDataFile(); FileMetadata draftFmd = dataFile.getLatestFileMetadata(); @@ -136,6 +140,10 @@ public Dataset execute(CommandContext ctxt) throws CommandException { for (DataFileCategory cat : tempDataset.getCategories()) { cat.getFileMetadatas().remove(draftFmd); } + //And any thumbnail reference + if(publishedFmd.getDataFile().getId()==thumbId) { + tempDataset.setThumbnailFile(publishedFmd.getDataFile()); + } } // Update modification time on the published version and the dataset From 73cd9852e54c3c3a791fced3631d39c3d4aeef7c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 14 Oct 2022 12:35:36 -0400 Subject: [PATCH 277/608] try a wait loop up to 10 seconds --- .../iq/dataverse/api/HarvestingServerIT.java | 45 ++++++++++++------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index a9043d49032..ed9d46f945c 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -20,6 +20,7 @@ import static junit.framework.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; /** * extremely minimal API tests for creating OAI sets. @@ -227,7 +228,6 @@ public void testOaiFunctionality() throws InterruptedException { // - however, publish command is executed asynchronously, i.e. it may // still be running after we received the OK from the publish API. // So let's wait for it to finish. - Thread.sleep(200L); UtilIT.sleepForReindex(datasetPersistentId, adminUserAPIKey, 10); String setName = identifier; @@ -247,7 +247,6 @@ public void testOaiFunctionality() throws InterruptedException { Response exportSetResponse = UtilIT.exportOaiSet(setName); assertEquals(200, exportSetResponse.getStatusCode()); //SEK 09/04/2019 resonable wait time for export OAI? #6128 - Thread.sleep(5000L); Response getSet = given() .get(apiPath); @@ -255,21 +254,33 @@ public void testOaiFunctionality() throws InterruptedException { logger.info("getSet.getStatusCode(): " + getSet.getStatusCode()); logger.info("getSet printresponse: " + getSet.prettyPrint()); assertEquals(200, getSet.getStatusCode()); - - // Run ListIdentifiers on this newly-created set: - Response listIdentifiersResponse = UtilIT.getOaiListIdentifiers(setName, "oai_dc"); - List ret = listIdentifiersResponse.getBody().xmlPath().getList("OAI-PMH.ListIdentifiers.header"); - - assertEquals(OK.getStatusCode(), listIdentifiersResponse.getStatusCode()); - assertNotNull(ret); - logger.info("setName: " + setName); - logger.info("listIdentifiersResponse.prettyPrint:..... "); - listIdentifiersResponse.prettyPrint(); - // There should be 1 and only 1 record in the response: - assertEquals(1, ret.size()); - // And the record should be the dataset we have just created: - assertEquals(datasetPersistentId, listIdentifiersResponse.getBody().xmlPath().getString("OAI-PMH.ListIdentifiers.header.identifier")); - + int i = 0; + for (i = 1; i < 10; i++) { + Thread.sleep(1000L); + + // Run ListIdentifiers on this newly-created set: + Response listIdentifiersResponse = UtilIT.getOaiListIdentifiers(setName, "oai_dc"); + List ret = listIdentifiersResponse.getBody().xmlPath().getList("OAI-PMH.ListIdentifiers.header"); + + assertEquals(OK.getStatusCode(), listIdentifiersResponse.getStatusCode()); + assertNotNull(ret); + logger.info("setName: " + setName); + logger.info("listIdentifiersResponse.prettyPrint:..... "); + listIdentifiersResponse.prettyPrint(); + if (ret.size() != 1) { + i++; + } else { + // There should be 1 and only 1 record in the response: + assertEquals(1, ret.size()); + // And the record should be the dataset we have just created: + assertEquals(datasetPersistentId, listIdentifiersResponse.getBody().xmlPath() + .getString("OAI-PMH.ListIdentifiers.header.identifier")); + break; + } + } + System.out.println("Waited " + i + " seconds for OIA export."); + //Fail if we didn't find the exported record before the timeout + assertTrue(i < 10); Response listRecordsResponse = UtilIT.getOaiListRecords(setName, "oai_dc"); assertEquals(OK.getStatusCode(), listRecordsResponse.getStatusCode()); List listRecords = listRecordsResponse.getBody().xmlPath().getList("OAI-PMH.ListRecords.record"); From 68b921e4069c361a668e9722f586c7d1b01514fd Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 14 Oct 2022 13:44:09 -0400 Subject: [PATCH 278/608] change to do while --- .../edu/harvard/iq/dataverse/api/HarvestingServerIT.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index ed9d46f945c..f88d1d7411f 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -255,7 +255,7 @@ public void testOaiFunctionality() throws InterruptedException { logger.info("getSet printresponse: " + getSet.prettyPrint()); assertEquals(200, getSet.getStatusCode()); int i = 0; - for (i = 1; i < 10; i++) { + do { Thread.sleep(1000L); // Run ListIdentifiers on this newly-created set: @@ -277,10 +277,10 @@ public void testOaiFunctionality() throws InterruptedException { .getString("OAI-PMH.ListIdentifiers.header.identifier")); break; } - } + } while (i<15); System.out.println("Waited " + i + " seconds for OIA export."); //Fail if we didn't find the exported record before the timeout - assertTrue(i < 10); + assertTrue(i < 15); Response listRecordsResponse = UtilIT.getOaiListRecords(setName, "oai_dc"); assertEquals(OK.getStatusCode(), listRecordsResponse.getStatusCode()); List listRecords = listRecordsResponse.getBody().xmlPath().getList("OAI-PMH.ListRecords.record"); From 962dde77db3d24c6cdea719b381774469ab5e2db Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 14 Oct 2022 13:48:30 -0400 Subject: [PATCH 279/608] rename sql script post 5.12 release #8671 --- ...-sorting_licenses.sql => V5.12.0.1__8671-sorting_licenses.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/main/resources/db/migration/{V5.11.1.2__8671-sorting_licenses.sql => V5.12.0.1__8671-sorting_licenses.sql} (100%) diff --git a/src/main/resources/db/migration/V5.11.1.2__8671-sorting_licenses.sql b/src/main/resources/db/migration/V5.12.0.1__8671-sorting_licenses.sql similarity index 100% rename from src/main/resources/db/migration/V5.11.1.2__8671-sorting_licenses.sql rename to src/main/resources/db/migration/V5.12.0.1__8671-sorting_licenses.sql From 9c3b2a75867d55aba1edc6e9ac23c426ba2d64d5 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 14 Oct 2022 14:36:02 -0400 Subject: [PATCH 280/608] restore a sleep --- .../java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index f88d1d7411f..4709d0452ef 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -247,7 +247,7 @@ public void testOaiFunctionality() throws InterruptedException { Response exportSetResponse = UtilIT.exportOaiSet(setName); assertEquals(200, exportSetResponse.getStatusCode()); //SEK 09/04/2019 resonable wait time for export OAI? #6128 - + Thread.sleep(5000L); Response getSet = given() .get(apiPath); From c7c16d4f9138ed557998ccbf34d17e29a300dfd9 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 14 Oct 2022 16:50:21 -0400 Subject: [PATCH 281/608] add geo_point and geo_radius #8239 --- doc/sphinx-guides/source/api/search.rst | 2 ++ .../edu/harvard/iq/dataverse/api/Search.java | 21 ++++++++++++- .../search/SearchIncludeFragment.java | 4 +-- .../dataverse/search/SearchServiceBean.java | 25 +++++++++++++--- .../savedsearch/SavedSearchServiceBean.java | 4 ++- .../iq/dataverse/api/DataversesIT.java | 30 ++++++++++++++++++- 6 files changed, 77 insertions(+), 9 deletions(-) diff --git a/doc/sphinx-guides/source/api/search.rst b/doc/sphinx-guides/source/api/search.rst index fdebfdb8b10..c4e62e05bb7 100755 --- a/doc/sphinx-guides/source/api/search.rst +++ b/doc/sphinx-guides/source/api/search.rst @@ -35,6 +35,8 @@ show_relevance boolean Whether or not to show details of which fields were ma show_facets boolean Whether or not to show facets that can be operated on by the "fq" parameter. False by default. See :ref:`advanced search example `. fq string A filter query on the search term. Multiple "fq" parameters can be used. See :ref:`advanced search example `. show_entity_ids boolean Whether or not to show the database IDs of the search results (for developer use). +geo_point string Latitude and longitude in the form ``geo_point=42.3,-71.1``. +geo_radius string Radial distance in kilometers such as ``geo_radius=5``. metadata_fields string Includes the requested fields for each dataset in the response. Multiple "metadata_fields" parameters can be used to include several fields. The value must be in the form "{metadata_block_name}:{field_name}" to include a specific field from a metadata block (see :ref:`example `) or "{metadata_field_set_name}:\*" to include all the fields for a metadata block (see :ref:`example `). "{field_name}" cannot be a subfield of a compound field. If "{field_name}" is a compound field, all subfields are included. =============== ======= =========== diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Search.java b/src/main/java/edu/harvard/iq/dataverse/api/Search.java index 71cb59ff62a..737fc7d1e20 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Search.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Search.java @@ -72,6 +72,8 @@ public Response search( @QueryParam("show_my_data") boolean showMyData, @QueryParam("query_entities") boolean queryEntities, @QueryParam("metadata_fields") List metadataFields, + @QueryParam("geo_point") String geoPointRequested, + @QueryParam("geo_radius") String geoRadiusRequested, @Context HttpServletResponse response ) { @@ -87,6 +89,8 @@ public Response search( // sanity checking on user-supplied arguments SortBy sortBy; int numResultsPerPage; + String geoPoint; + String geoRadius; List dataverseSubtrees = new ArrayList<>(); try { @@ -119,6 +123,9 @@ public Response search( throw new IOException("Filter is empty, which should never happen, as this allows unfettered searching of our index"); } + geoPoint = getGeoPoint(geoPointRequested); + geoRadius = getGeoRadius(geoRadiusRequested); + } catch (Exception ex) { return error(Response.Status.BAD_REQUEST, ex.getLocalizedMessage()); } @@ -137,7 +144,9 @@ public Response search( paginationStart, dataRelatedToMe, numResultsPerPage, - true //SEK get query entities always for search API additional Dataset Information 6300 12/6/2019 + true, //SEK get query entities always for search API additional Dataset Information 6300 12/6/2019 + geoPoint, + geoRadius ); } catch (SearchException ex) { Throwable cause = ex; @@ -340,4 +349,14 @@ private Dataverse getSubtree(String alias) throws Exception { } } + private String getGeoPoint(String geoPointRequested) { + // TODO add error checking + return geoPointRequested; + } + + private String getGeoRadius(String geoRadiusRequested) { + // TODO add error checking + return geoRadiusRequested; + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index 9bb83c88add..2b40347828a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -355,7 +355,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused DataverseRequest dataverseRequest = new DataverseRequest(session.getUser(), httpServletRequest); List dataverses = new ArrayList<>(); dataverses.add(dataverse); - solrQueryResponse = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinal, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false); + solrQueryResponse = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinal, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false, null, null); if (solrQueryResponse.hasError()){ logger.info(solrQueryResponse.getError()); setSolrErrorEncountered(true); @@ -363,7 +363,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused // This 2nd search() is for populating the "type" ("dataverse", "dataset", "file") facets: -- L.A. // (why exactly do we need it, again?) // To get the counts we display in the types facets particulary for unselected types - SEK 08/25/2021 - solrQueryResponseAllTypes = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalAllTypes, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false); + solrQueryResponseAllTypes = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalAllTypes, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false, null, null); if (solrQueryResponse.hasError()){ logger.info(solrQueryResponse.getError()); setSolrErrorEncountered(true); diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java index ca158198204..aee0465ddb1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java @@ -100,7 +100,7 @@ public class SearchServiceBean { * @throws SearchException */ public SolrQueryResponse search(DataverseRequest dataverseRequest, List dataverses, String query, List filterQueries, String sortField, String sortOrder, int paginationStart, boolean onlyDatatRelatedToMe, int numResultsPerPage) throws SearchException { - return search(dataverseRequest, dataverses, query, filterQueries, sortField, sortOrder, paginationStart, onlyDatatRelatedToMe, numResultsPerPage, true); + return search(dataverseRequest, dataverses, query, filterQueries, sortField, sortOrder, paginationStart, onlyDatatRelatedToMe, numResultsPerPage, true, null, null); } /** @@ -121,10 +121,24 @@ public SolrQueryResponse search(DataverseRequest dataverseRequest, List dataverses, String query, List filterQueries, String sortField, String sortOrder, int paginationStart, boolean onlyDatatRelatedToMe, int numResultsPerPage, boolean retrieveEntities) throws SearchException { + public SolrQueryResponse search( + DataverseRequest dataverseRequest, + List dataverses, + String query, + List filterQueries, + String sortField, String sortOrder, + int paginationStart, + boolean onlyDatatRelatedToMe, + int numResultsPerPage, + boolean retrieveEntities, + String geoPoint, + String geoRadius + ) throws SearchException { if (paginationStart < 0) { throw new IllegalArgumentException("paginationStart must be 0 or greater"); @@ -204,8 +218,11 @@ public SolrQueryResponse search(DataverseRequest dataverseRequest, List Date: Tue, 18 Oct 2022 11:03:17 -0400 Subject: [PATCH 282/608] double sleep --- .../java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index 4709d0452ef..71ee313b0c5 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -247,7 +247,7 @@ public void testOaiFunctionality() throws InterruptedException { Response exportSetResponse = UtilIT.exportOaiSet(setName); assertEquals(200, exportSetResponse.getStatusCode()); //SEK 09/04/2019 resonable wait time for export OAI? #6128 - Thread.sleep(5000L); + Thread.sleep(10000L); Response getSet = given() .get(apiPath); From fcf107279dcc0c7b208c68d5b2fd9deb3d0d11cd Mon Sep 17 00:00:00 2001 From: j-n-c Date: Tue, 18 Oct 2022 16:32:44 +0100 Subject: [PATCH 283/608] #9074 - Added support for building sphynx docs using python 3.10+ --- doc/sphinx-guides/requirements.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/requirements.txt b/doc/sphinx-guides/requirements.txt index 4488c54cd5e..eb9f952d013 100755 --- a/doc/sphinx-guides/requirements.txt +++ b/doc/sphinx-guides/requirements.txt @@ -1,5 +1,7 @@ -# current version as of this writing -Sphinx==3.5.4 +# Necessary workaround for building Sphynx guides with Python 3.10+ versions +Sphinx==3.5.4 ; python_version < '3.10' +Sphinx==5.3.0 ; python_version >= '3.10' + # Necessary workaround for ReadTheDocs for Sphinx 3.x - unnecessary as of Sphinx 4.5+ Jinja2>=3.0.2,<3.1 From 73c3362e363cc5d706d25bbb2f249e1c912e0bd6 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 18 Oct 2022 19:36:26 -0400 Subject: [PATCH 284/608] add waitForRexport test --- .../iq/dataverse/api/HarvestingServerIT.java | 4 +-- .../edu/harvard/iq/dataverse/api/UtilIT.java | 33 +++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index 71ee313b0c5..7056cf31f59 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -228,7 +228,7 @@ public void testOaiFunctionality() throws InterruptedException { // - however, publish command is executed asynchronously, i.e. it may // still be running after we received the OK from the publish API. // So let's wait for it to finish. - UtilIT.sleepForReindex(datasetPersistentId, adminUserAPIKey, 10); + UtilIT.sleepForReexport(datasetPersistentId, adminUserAPIKey, 10); String setName = identifier; String setQuery = "dsPersistentId:" + identifier; @@ -247,7 +247,7 @@ public void testOaiFunctionality() throws InterruptedException { Response exportSetResponse = UtilIT.exportOaiSet(setName); assertEquals(200, exportSetResponse.getStatusCode()); //SEK 09/04/2019 resonable wait time for export OAI? #6128 - Thread.sleep(10000L); + Thread.sleep(5000L); Response getSet = given() .get(apiPath); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 4ea2cc5f2d2..425156c1652 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -17,6 +17,7 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Paths; +import java.time.LocalDateTime; import java.util.logging.Level; import edu.harvard.iq.dataverse.api.datadeposit.SwordConfigurationImpl; import com.jayway.restassured.path.xml.XmlPath; @@ -2454,6 +2455,38 @@ static boolean sleepForReindex(String idOrPersistentId, String apiToken, int dur return i <= repeats; } + static boolean sleepForReexport(String idOrPersistentId, String apiToken, int durationInSeconds) { + int i = 0; + Response timestampResponse; + int sleepStep = 500; + int repeats = durationInSeconds * (1000 / sleepStep); + boolean staleExport=true; + do { + timestampResponse = UtilIT.getDatasetTimestamps(idOrPersistentId, apiToken); + System.out.println(timestampResponse.body().asString()); + String updateTimeString = timestampResponse.body().jsonPath().getString("data.lastUpdateTime"); + String exportTimeString = timestampResponse.body().jsonPath().getString("data.lastMetadataExportTime"); + if (updateTimeString != null && exportTimeString != null) { + LocalDateTime updateTime = LocalDateTime.parse(updateTimeString); + LocalDateTime exportTime = LocalDateTime.parse(exportTimeString); + if (exportTime.isAfter(updateTime)) { + staleExport = false; + } + } + try { + Thread.sleep(sleepStep); + i++; + } catch (InterruptedException ex) { + Logger.getLogger(UtilIT.class.getName()).log(Level.SEVERE, null, ex); + i = repeats + 1; + } + } while ((i <= repeats) && staleExport); + System.out.println("Waited " + (i * (sleepStep / 1000)) + " seconds for export"); + return i <= repeats; + + } + + //Helper function that returns true if a given search returns a non-zero response within a fixed time limit From d08841df22d0c593500cae802caff80df40da423 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 19 Oct 2022 06:51:33 -0400 Subject: [PATCH 285/608] cleanup --- .../iq/dataverse/api/HarvestingServerIT.java | 20 +++++++++++-------- .../edu/harvard/iq/dataverse/api/UtilIT.java | 2 +- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index 7056cf31f59..17ad077c39e 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse.api; +import java.util.logging.Level; import java.util.logging.Logger; import com.jayway.restassured.RestAssured; import static com.jayway.restassured.RestAssured.given; @@ -227,7 +228,9 @@ public void testOaiFunctionality() throws InterruptedException { // created and published: // - however, publish command is executed asynchronously, i.e. it may // still be running after we received the OK from the publish API. - // So let's wait for it to finish. + // The oaiExport step also requires the metadata exports to be done and this + // takes longer than just publish/reindex. + // So wait for all of this to finish. UtilIT.sleepForReexport(datasetPersistentId, adminUserAPIKey, 10); String setName = identifier; @@ -246,15 +249,14 @@ public void testOaiFunctionality() throws InterruptedException { // (this is asynchronous - so we should probably wait a little) Response exportSetResponse = UtilIT.exportOaiSet(setName); assertEquals(200, exportSetResponse.getStatusCode()); - //SEK 09/04/2019 resonable wait time for export OAI? #6128 - Thread.sleep(5000L); Response getSet = given() .get(apiPath); logger.info("getSet.getStatusCode(): " + getSet.getStatusCode()); - logger.info("getSet printresponse: " + getSet.prettyPrint()); + logger.fine("getSet printresponse: " + getSet.prettyPrint()); assertEquals(200, getSet.getStatusCode()); int i = 0; + int maxWait=10; do { Thread.sleep(1000L); @@ -265,8 +267,10 @@ public void testOaiFunctionality() throws InterruptedException { assertEquals(OK.getStatusCode(), listIdentifiersResponse.getStatusCode()); assertNotNull(ret); logger.info("setName: " + setName); - logger.info("listIdentifiersResponse.prettyPrint:..... "); - listIdentifiersResponse.prettyPrint(); + if (logger.isLoggable(Level.FINE)) { + logger.info("listIdentifiersResponse.prettyPrint:..... "); + listIdentifiersResponse.prettyPrint(); + } if (ret.size() != 1) { i++; } else { @@ -277,10 +281,10 @@ public void testOaiFunctionality() throws InterruptedException { .getString("OAI-PMH.ListIdentifiers.header.identifier")); break; } - } while (i<15); + } while (i Date: Wed, 19 Oct 2022 08:56:55 -0400 Subject: [PATCH 286/608] report fractional second waits --- src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index a919b222f58..1d0398900fb 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -2451,7 +2451,7 @@ static boolean sleepForReindex(String idOrPersistentId, String apiToken, int dur i = repeats + 1; } } while ((i <= repeats) && stale); - System.out.println("Waited " + (i * (sleepStep / 1000)) + " seconds"); + System.out.println("Waited " + (i * (sleepStep / 1000.0)) + " seconds"); return i <= repeats; } @@ -2481,7 +2481,7 @@ static boolean sleepForReexport(String idOrPersistentId, String apiToken, int du i = repeats + 1; } } while ((i <= repeats) && staleExport); - System.out.println("Waited " + (i * (sleepStep / 1000)) + " seconds for export"); + System.out.println("Waited " + (i * (sleepStep / 1000.0)) + " seconds for export"); return i <= repeats; } From 4927ac3eaea41a1d00bfe61833f58d48892c0cc3 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 19 Oct 2022 09:53:07 -0400 Subject: [PATCH 287/608] only wait if needed --- .../java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index 17ad077c39e..fdd034ab12e 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -258,7 +258,7 @@ public void testOaiFunctionality() throws InterruptedException { int i = 0; int maxWait=10; do { - Thread.sleep(1000L); + // Run ListIdentifiers on this newly-created set: Response listIdentifiersResponse = UtilIT.getOaiListIdentifiers(setName, "oai_dc"); @@ -281,6 +281,7 @@ public void testOaiFunctionality() throws InterruptedException { .getString("OAI-PMH.ListIdentifiers.header.identifier")); break; } + Thread.sleep(1000L); } while (i Date: Tue, 18 Oct 2022 13:04:57 -0400 Subject: [PATCH 288/608] truncate descriptions in schema.org header and export Nominally schema.org requires description to be text (not an array as we had) and Google requires a description <5000 chars. --- .../harvard/iq/dataverse/DatasetVersion.java | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 30815c43381..4163b4058be 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -842,12 +842,21 @@ public String getDescriptionPlainText() { return MarkupChecker.stripAllTags(getDescription()); } - public List getDescriptionsPlainText() { - List plainTextDescriptions = new ArrayList<>(); + /* This method is (only) used in creating schema.org json-jd where Google requires a text description <5000 chars. + * + * @returns - a single string composed of all descriptions (joined with \n if more than one) truncated with a trailing '...' if >=5000 chars + */ + public String getDescriptionsPlainTextTruncated() { + List plainTextDescriptions = new ArrayList(); + for (String htmlDescription : getDescriptions()) { plainTextDescriptions.add(MarkupChecker.stripAllTags(htmlDescription)); } - return plainTextDescriptions; + String description = String.join("\\n", plainTextDescriptions); + if(description.length()>=5000) { + description = description.substring(0, (description.substring(0,4997).lastIndexOf(" "))) + "..."; + } + return description; } /** @@ -1859,16 +1868,8 @@ public String getJsonLd() { job.add("dateModified", this.getPublicationDateAsString()); job.add("version", this.getVersionNumber().toString()); - JsonArrayBuilder descriptionsArray = Json.createArrayBuilder(); - List descriptions = this.getDescriptionsPlainText(); - for (String description : descriptions) { - descriptionsArray.add(description); - } - /** - * In Dataverse 4.8.4 "description" was a single string but now it's an - * array. - */ - job.add("description", descriptionsArray); + String description = this.getDescriptionsPlainTextTruncated(); + job.add("description", description); /** * "keywords" - contains subject(s), datasetkeyword(s) and topicclassification(s) From 542a5f13d6904d0fddd9e9e93ab5a3db78d91e7f Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 18 Oct 2022 18:38:42 -0400 Subject: [PATCH 289/608] fix description text - string not json array --- .../harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index b5453e75fe5..f130e742488 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -147,7 +147,7 @@ public void testExportDataset() throws Exception { assertEquals("1955-11-05", json2.getString("datePublished")); assertEquals("1955-11-05", json2.getString("dateModified")); assertEquals("1", json2.getString("version")); - assertEquals("Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.", json2.getJsonArray("description").getString(0)); + assertEquals("Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.", json2.getString("description")); assertEquals("Bird is the word.", json2.getJsonArray("description").getString(1)); assertEquals(2, json2.getJsonArray("description").size()); assertEquals("Medicine, Health and Life Sciences", json2.getJsonArray("keywords").getString(0)); From 578ff16331baa86ee8c56bd550c3eb80e4c39905 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 18 Oct 2022 18:44:38 -0400 Subject: [PATCH 290/608] more fixes --- .../harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index f130e742488..76cee7d65e8 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -147,9 +147,7 @@ public void testExportDataset() throws Exception { assertEquals("1955-11-05", json2.getString("datePublished")); assertEquals("1955-11-05", json2.getString("dateModified")); assertEquals("1", json2.getString("version")); - assertEquals("Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.", json2.getString("description")); - assertEquals("Bird is the word.", json2.getJsonArray("description").getString(1)); - assertEquals(2, json2.getJsonArray("description").size()); + assertEquals("Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.\\nBird is the word.", json2.getString("description")); assertEquals("Medicine, Health and Life Sciences", json2.getJsonArray("keywords").getString(0)); assertEquals("tcTerm1", json2.getJsonArray("keywords").getString(1)); assertEquals("KeywordTerm1", json2.getJsonArray("keywords").getString(2)); From 67ecd7a2dd2a0a2e71497e4015ded0f03ad8c69e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 18 Oct 2022 19:11:29 -0400 Subject: [PATCH 291/608] fix linefeed in descriptions --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 2 +- .../harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 4163b4058be..78055cee22a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -852,7 +852,7 @@ public String getDescriptionsPlainTextTruncated() { for (String htmlDescription : getDescriptions()) { plainTextDescriptions.add(MarkupChecker.stripAllTags(htmlDescription)); } - String description = String.join("\\n", plainTextDescriptions); + String description = String.join("\n", plainTextDescriptions); if(description.length()>=5000) { description = description.substring(0, (description.substring(0,4997).lastIndexOf(" "))) + "..."; } diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index 76cee7d65e8..06f07404d6a 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -147,7 +147,7 @@ public void testExportDataset() throws Exception { assertEquals("1955-11-05", json2.getString("datePublished")); assertEquals("1955-11-05", json2.getString("dateModified")); assertEquals("1", json2.getString("version")); - assertEquals("Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.\\nBird is the word.", json2.getString("description")); + assertEquals("Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.\nBird is the word.", json2.getString("description")); assertEquals("Medicine, Health and Life Sciences", json2.getJsonArray("keywords").getString(0)); assertEquals("tcTerm1", json2.getJsonArray("keywords").getString(1)); assertEquals("KeywordTerm1", json2.getJsonArray("keywords").getString(2)); From 99e276c51e84680bcc82e473a7f1fa61e9a4b1e8 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 18 Oct 2022 13:54:07 -0400 Subject: [PATCH 292/608] specify URL for citations/related publications --- .../harvard/iq/dataverse/DatasetVersion.java | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 30815c43381..41b3cba8bd8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1892,11 +1892,16 @@ public String getJsonLd() { job.add("keywords", keywords); /** - * citation: (multiple) related publication citation and URLs, if - * present. + * citation: (multiple) related publication citation and URLs, if present. * - * In Dataverse 4.8.4 "citation" was an array of strings but now it's an - * array of objects. + * Schema.org allows text or a CreativeWork object. Google recommends text with + * either the full citation or the PID URL. This code adds an object if we have + * the citation text for the work and/or an entry in the URL field (i.e. + * https://doi.org/...) The URL is reported as the 'url' field while the + * citation text (which would normally include the name) is reported as 'name' + * since there doesn't appear to be a better field ('text', which was used + * previously, is the actual text of the creative work). + * */ List relatedPublications = getRelatedPublications(); if (!relatedPublications.isEmpty()) { @@ -1911,11 +1916,11 @@ public String getJsonLd() { JsonObjectBuilder citationEntry = Json.createObjectBuilder(); citationEntry.add("@type", "CreativeWork"); if (pubCitation != null) { - citationEntry.add("text", pubCitation); + citationEntry.add("name", pubCitation); } if (pubUrl != null) { citationEntry.add("@id", pubUrl); - citationEntry.add("identifier", pubUrl); + citationEntry.add("url", pubUrl); } if (addToArray) { jsonArrayBuilder.add(citationEntry); From 59f9ed7a2db5d4709dd2fc8f74ea13e577d351ce Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 19 Oct 2022 11:46:58 -0400 Subject: [PATCH 293/608] add description truncation test --- .../export/SchemaDotOrgExporterTest.java | 159 ++++---- .../json/dataset-long-description.json | 362 ++++++++++++++++++ 2 files changed, 453 insertions(+), 68 deletions(-) create mode 100644 src/test/resources/json/dataset-long-description.json diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index 06f07404d6a..98da4008de9 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -10,16 +10,18 @@ import static edu.harvard.iq.dataverse.util.SystemConfig.FILES_HIDE_SCHEMA_DOT_ORG_DOWNLOAD_URLS; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.json.JsonParseException; import edu.harvard.iq.dataverse.util.json.JsonParser; import edu.harvard.iq.dataverse.util.json.JsonUtil; import java.io.ByteArrayOutputStream; import java.io.File; +import java.io.IOException; import java.io.PrintWriter; -import java.io.StringReader; import java.net.URI; import java.nio.file.Files; import java.nio.file.Paths; import java.sql.Timestamp; +import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; @@ -28,15 +30,14 @@ import java.util.List; import java.util.Set; import java.util.logging.Logger; -import javax.json.Json; import javax.json.JsonObject; -import javax.json.JsonReader; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Test; import org.mockito.Mockito; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; /** * For docs see {@link SchemaDotOrgExporter}. @@ -62,75 +63,19 @@ public static void tearDownClass() { /** * Test of exportDataset method, of class SchemaDotOrgExporter. + * @throws IOException + * @throws JsonParseException + * @throws ParseException + * */ @Test - public void testExportDataset() throws Exception { + public void testExportDataset() throws JsonParseException, ParseException, IOException { File datasetVersionJson = new File("src/test/resources/json/dataset-finch2.json"); String datasetVersionAsJson = new String(Files.readAllBytes(Paths.get(datasetVersionJson.getAbsolutePath()))); - License license = new License("CC0 1.0", "You can copy, modify, distribute and perform the work, even for commercial purposes, all without asking permission.", URI.create("http://creativecommons.org/publicdomain/zero/1.0/"), URI.create("/resources/images/cc0.png"), true); - license.setDefault(true); - - JsonReader jsonReader1 = Json.createReader(new StringReader(datasetVersionAsJson)); - JsonObject json1 = jsonReader1.readObject(); - JsonParser jsonParser = new JsonParser(datasetFieldTypeSvc, null, settingsService, licenseService); - DatasetVersion version = jsonParser.parseDatasetVersion(json1.getJsonObject("datasetVersion")); - version.setVersionState(DatasetVersion.VersionState.RELEASED); - SimpleDateFormat dateFmt = new SimpleDateFormat("yyyyMMdd"); - Date publicationDate = dateFmt.parse("19551105"); - version.setReleaseTime(publicationDate); - version.setVersionNumber(1l); - TermsOfUseAndAccess terms = new TermsOfUseAndAccess(); - terms.setLicense(license); - version.setTermsOfUseAndAccess(terms); - - Dataset dataset = new Dataset(); - dataset.setProtocol("doi"); - dataset.setAuthority("10.5072/FK2"); - dataset.setIdentifier("IMK5A4"); - dataset.setPublicationDate(new Timestamp(publicationDate.getTime())); - version.setDataset(dataset); - Dataverse dataverse = new Dataverse(); - dataverse.setName("LibraScholar"); - dataset.setOwner(dataverse); - System.setProperty(SITE_URL, "https://librascholar.org"); - boolean hideFileUrls = false; - if (hideFileUrls) { - System.setProperty(FILES_HIDE_SCHEMA_DOT_ORG_DOWNLOAD_URLS, "true"); - } - FileMetadata fmd = new FileMetadata(); - DataFile dataFile = new DataFile(); - dataFile.setId(42l); - dataFile.setFilesize(1234); - dataFile.setContentType("text/plain"); - dataFile.setProtocol("doi"); - dataFile.setAuthority("10.5072/FK2"); - dataFile.setIdentifier("7V5MPI"); - fmd.setDatasetVersion(version); - fmd.setDataFile(dataFile); - fmd.setLabel("README.md"); - fmd.setDescription("README file."); - List fileMetadatas = new ArrayList<>(); - fileMetadatas.add(fmd); - dataFile.setFileMetadatas(fileMetadatas);; - dataFile.setOwner(dataset); - version.setFileMetadatas(fileMetadatas); - - ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); - if(json1 == null) logger.fine("Json null"); - if(version == null) logger.fine("ver null"); - if(byteArrayOutputStream == null) logger.fine("bytarr null"); - if(schemaDotOrgExporter == null) logger.fine("sdoe" + " null"); - try { - schemaDotOrgExporter.exportDataset(version, json1, byteArrayOutputStream); - } catch (Exception e) { - e.printStackTrace(); - } - String jsonLd = byteArrayOutputStream.toString(); - String prettyJson = JsonUtil.prettyPrint(jsonLd); - logger.fine("schema.org JSON-LD: " + prettyJson); - JsonReader jsonReader2 = Json.createReader(new StringReader(jsonLd)); - JsonObject json2 = jsonReader2.readObject(); + JsonObject json = JsonUtil.getJsonObject(datasetVersionAsJson); + JsonObject json2 = createExportFromJson(json); + assertEquals("http://schema.org", json2.getString("@context")); assertEquals("Dataset", json2.getString("@type")); assertEquals("https://doi.org/10.5072/FK2/IMK5A4", json2.getString("@id")); @@ -187,8 +132,86 @@ public void testExportDataset() throws Exception { assertEquals("https://librascholar.org/api/access/datafile/42", json2.getJsonArray("distribution").getJsonObject(0).getString("contentUrl")); assertEquals(1, json2.getJsonArray("distribution").size()); try (PrintWriter printWriter = new PrintWriter("/tmp/dvjsonld.json")) { - printWriter.println(prettyJson); + printWriter.println(JsonUtil.prettyPrint(json2)); + } + + } + + /** + * Test description truncation in exportDataset method, of class SchemaDotOrgExporter. + * @throws IOException + * @throws JsonParseException + * @throws ParseException + * + */ + @Test + public void testExportDescriptionTruncation() throws JsonParseException, ParseException, IOException { + File datasetVersionJson = new File("src/test/resources/json/dataset-long-description.json"); + String datasetVersionAsJson = new String(Files.readAllBytes(Paths.get(datasetVersionJson.getAbsolutePath()))); + + JsonObject json = JsonUtil.getJsonObject(datasetVersionAsJson); + JsonObject json2 = createExportFromJson(json); + + assertTrue(json2.getString("description").endsWith("at...")); + } + + private JsonObject createExportFromJson(JsonObject json) throws JsonParseException, ParseException { + License license = new License("CC0 1.0", "You can copy, modify, distribute and perform the work, even for commercial purposes, all without asking permission.", URI.create("http://creativecommons.org/publicdomain/zero/1.0/"), URI.create("/resources/images/cc0.png"), true); + license.setDefault(true); + JsonParser jsonParser = new JsonParser(datasetFieldTypeSvc, null, settingsService, licenseService); + DatasetVersion version = jsonParser.parseDatasetVersion(json.getJsonObject("datasetVersion")); + version.setVersionState(DatasetVersion.VersionState.RELEASED); + SimpleDateFormat dateFmt = new SimpleDateFormat("yyyyMMdd"); + Date publicationDate = dateFmt.parse("19551105"); + version.setReleaseTime(publicationDate); + version.setVersionNumber(1l); + TermsOfUseAndAccess terms = new TermsOfUseAndAccess(); + terms.setLicense(license); + version.setTermsOfUseAndAccess(terms); + + Dataset dataset = new Dataset(); + dataset.setProtocol("doi"); + dataset.setAuthority("10.5072/FK2"); + dataset.setIdentifier("IMK5A4"); + dataset.setPublicationDate(new Timestamp(publicationDate.getTime())); + version.setDataset(dataset); + Dataverse dataverse = new Dataverse(); + dataverse.setName("LibraScholar"); + dataset.setOwner(dataverse); + System.setProperty(SITE_URL, "https://librascholar.org"); + boolean hideFileUrls = false; + if (hideFileUrls) { + System.setProperty(FILES_HIDE_SCHEMA_DOT_ORG_DOWNLOAD_URLS, "true"); + } + + FileMetadata fmd = new FileMetadata(); + DataFile dataFile = new DataFile(); + dataFile.setId(42l); + dataFile.setFilesize(1234); + dataFile.setContentType("text/plain"); + dataFile.setProtocol("doi"); + dataFile.setAuthority("10.5072/FK2"); + dataFile.setIdentifier("7V5MPI"); + fmd.setDatasetVersion(version); + fmd.setDataFile(dataFile); + fmd.setLabel("README.md"); + fmd.setDescription("README file."); + List fileMetadatas = new ArrayList<>(); + fileMetadatas.add(fmd); + dataFile.setFileMetadatas(fileMetadatas); + ; + dataFile.setOwner(dataset); + version.setFileMetadatas(fileMetadatas); + + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + if(schemaDotOrgExporter == null) logger.fine("sdoe" + " null"); + try { + schemaDotOrgExporter.exportDataset(version, json, byteArrayOutputStream); + } catch (Exception e) { + e.printStackTrace(); } + String jsonLdStr = byteArrayOutputStream.toString(); + return JsonUtil.getJsonObject(jsonLdStr); } /** diff --git a/src/test/resources/json/dataset-long-description.json b/src/test/resources/json/dataset-long-description.json new file mode 100644 index 00000000000..a6e5c291322 --- /dev/null +++ b/src/test/resources/json/dataset-long-description.json @@ -0,0 +1,362 @@ +{ + "datasetVersion": { + "metadataBlocks": { + "citation": { + "fields": [ + { + "value": "Darwin's Finches", + "typeClass": "primitive", + "multiple": false, + "typeName": "title" + }, + { + "value": [ + { + "authorName": { + "value": "Finch, Fiona", + "typeClass": "primitive", + "multiple": false, + "typeName": "authorName" + }, + "authorIdentifierScheme": { + "typeName": "authorIdentifierScheme", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "ORCID" + }, + "authorIdentifier": { + "typeName": "authorIdentifier", + "multiple": false, + "typeClass": "primitive", + "value": "0000-0002-1825-0097" + }, + "authorAffiliation": { + "value": "Birds Inc.", + "typeClass": "primitive", + "multiple": false, + "typeName": "authorAffiliation" + } + } + ], + "typeClass": "compound", + "multiple": true, + "typeName": "author" + }, + { + "value": [ + { + "datasetContactEmail": { + "typeClass": "primitive", + "multiple": false, + "typeName": "datasetContactEmail", + "value": "finch@mailinator.com" + } + } + ], + "typeClass": "compound", + "multiple": true, + "typeName": "datasetContact" + }, + { + "value": [ + { + "dsDescriptionValue": { + "value": "Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu, consequat vitae, eleifend ac, enim. Aliquam lorem ante, dapibus in, viverra quis, feugiat a, tellus. Phasellus viverra nulla ut metus varius laoreet. Quisque rutrum. Aenean imperdiet. Etiam ultricies nisi vel augue. Curabitur ullamcorper ultricies nisi. Nam eget dui. Etiam rhoncus. Maecenas tempus, tellus eget condimentum rhoncus, sem quam semper libero, sit amet adipiscing sem neque sed ipsum. Nam quam nunc, blandit vel, luctus pulvinar, hendrerit id, lorem. Maecenas nec odio et ante tincidunt tempus. Donec vitae sapien ut libero venenatis faucibus. Nullam quis ante. Etiam sit amet orci eget eros faucibus tincidunt. Duis leo. Sed fringilla mauris sit amet nibh. Donec sodales sagittis magna. Sed consequat, leo eget bibendum sodales, augue velit cursus nunc, quis gravida magna mi a libero. Fusce vulputate eleifend sapien. Vestibulum purus quam, scelerisque ut, mollis sed, nonummy id, metus. Nullam accumsan lorem in dui. Cras ultricies mi eu turpis hendrerit fringilla. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; In ac dui quis mi consectetuer lacinia. Nam pretium turpis et arcu. Duis arcu tortor, suscipit eget, imperdiet nec, imperdiet iaculis, ipsum. Sed aliquam ultrices mauris. Integer ante arcu, accumsan a, consectetuer eget, posuere ut, mauris. Praesent adipiscing. Phasellus ullamcorper ipsum rutrum nunc. Nunc nonummy metus. Vestibulum volutpat pretium libero. Cras id dui. Aenean ut eros et nisl sagittis vestibulum. Nullam nulla eros, ultricies sit amet, nonummy id, imperdiet feugiat, pede. Sed lectus. Donec mollis hendrerit risus. Phasellus nec sem in justo pellentesque facilisis. Etiam imperdiet imperdiet orci. Nunc nec neque. Phasellus leo dolor, tempus non, auctor et, hendrerit quis, nisi. Curabitur ligula sapien, tincidunt non, euismod vitae, posuere imperdiet, leo. Maecenas malesuada. Praesent congue erat at massa. Sed cursus turpis vitae tortor. Donec posuere vulputate arcu. Phasellus accumsan cursus velit. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Sed aliquam, nisi quis porttitor congue, elit erat euismod orci, ac placerat dolor lectus quis orci. Phasellus consectetuer vestibulum elit. Aenean tellus metus, bibendum sed, posuere ac, mattis non, nunc. Vestibulum fringilla pede sit amet augue. In turpis. Pellentesque posuere. Praesent turpis. Aenean posuere, tortor sed cursus feugiat, nunc augue blandit nunc, eu sollicitudin urna dolor sagittis lacus. Donec elit libero, sodales nec, volutpat a, suscipit non, turpis. Nullam sagittis. Suspendisse pulvinar, augue ac venenatis condimentum, sem libero volutpat nibh, nec pellentesque velit pede quis nunc. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Fusce id purus. Ut varius tincidunt libero. Phasellus dolor. Maecenas vestibulum mollis diam. Pellentesque ut neque. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. In dui magna, posuere eget, vestibulum et, tempor auctor, justo. In ac felis quis tortor malesuada pretium. Pellentesque auctor neque nec urna. Proin sapien ipsum, porta a, auctor quis, euismod ut, mi. Aenean viverra rhoncus pede. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Ut non enim eleifend felis pretium feugiat. Vivamus quis mi. Phasellus a est. Phasellus magna. In hac habitasse platea dictumst. Curabitur at lacus ac velit ornare lobortis. Curabitur a felis in nunc fringilla tristique. Morbi mattis ullamcorper velit. Phasellus gravida semper nisi. Nullam vel sem. Pellentesque libero tortor, tincidunt et, tincidunt eget, semper nec, quam. Sed hendrerit. Morbi ac felis. Nunc egestas, augue at pellentesque laoreet, felis eros vehicula leo, at malesuada velit leo quis pede. Donec interdum, metus et hendrerit aliquet, dolor diam sagittis ligula, eget egestas libero turpis vel mi. Nunc nulla. Fusce risus nisl, viverra et, tempor et, pretium in, sapien. Donec venenatis vulputate lorem. Morbi nec metus. Phasellus blandit leo ut odio. Maecenas ullamcorper, dui et placerat feugiat, eros pede varius nisi, condimentum viverra felis nunc et lorem. Sed magna purus, fermentum eu, tincidunt eu, varius ut, felis. In auctor lobortis lacus. Quisque libero metus, condimentum nec, tempor a, commodo mollis, magna. Vestibulum ullamcorper mauris at ligul beyond 5000 chars", + "multiple": false, + "typeClass": "primitive", + "typeName": "dsDescriptionValue" + } + } + ], + "typeClass": "compound", + "multiple": true, + "typeName": "dsDescription" + }, + { + "value": [ + "Medicine, Health and Life Sciences" + ], + "typeClass": "controlledVocabulary", + "multiple": true, + "typeName": "subject" + }, + { + "typeName": "keyword", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "keywordValue": { + "typeName": "keywordValue", + "multiple": false, + "typeClass": "primitive", + "value": "KeywordTerm1" + }, + "keywordVocabulary": { + "typeName": "keywordVocabulary", + "multiple": false, + "typeClass": "primitive", + "value": "KeywordVocabulary1" + }, + "keywordVocabularyURI": { + "typeName": "keywordVocabularyURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://KeywordVocabularyURL1.org" + } + }, + { + "keywordValue": { + "typeName": "keywordValue", + "multiple": false, + "typeClass": "primitive", + "value": "KeywordTerm2" + }, + "keywordVocabulary": { + "typeName": "keywordVocabulary", + "multiple": false, + "typeClass": "primitive", + "value": "KeywordVocabulary2" + }, + "keywordVocabularyURI": { + "typeName": "keywordVocabularyURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://KeywordVocabularyURL2.org" + } + }, + { + "keywordValue": { + "typeName": "keywordValue", + "multiple": false, + "typeClass": "primitive", + "value": "keywords, with, commas" + } + } + ] + }, + { + "typeName": "topicClassification", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "topicClassValue": { + "typeName": "topicClassValue", + "multiple": false, + "typeClass": "primitive", + "value": "tcTerm1" + }, + "topicClassVocab": { + "typeName": "topicClassVocab", + "multiple": false, + "typeClass": "primitive", + "value": "tcVocab1" + }, + "topicClassVocabURI": { + "typeName": "topicClassVocabURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://example.com/tcTerm1" + } + } + ] + }, + { + "typeName": "contributor", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "contributorType": { + "typeName": "contributorType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "Data Collector" + }, + "contributorName": { + "typeName": "contributorName", + "multiple": false, + "typeClass": "primitive", + "value": "Holmes, Sherlock" + } + }, + { + "contributorType": { + "typeName": "contributorType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "Funder" + }, + "contributorName": { + "typeName": "contributorName", + "multiple": false, + "typeClass": "primitive", + "value": "National Science Foundation" + } + }, + { + "contributorType": { + "typeName": "contributorType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "Data Collector" + }, + "contributorName": { + "typeName": "contributorName", + "multiple": false, + "typeClass": "primitive", + "value": "Watson, John" + } + } + ] + }, + { + "typeName": "grantNumber", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "grantNumberAgency": { + "typeName": "grantNumberAgency", + "multiple": false, + "typeClass": "primitive", + "value": "National Institutes of Health" + }, + "grantNumberValue": { + "typeName": "grantNumberValue", + "multiple": false, + "typeClass": "primitive", + "value": "1245" + } + } + ] + }, + { + "typeName": "publication", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "publicationCitation": { + "typeName": "publicationCitation", + "multiple": false, + "typeClass": "primitive", + "value": "Finch, Fiona 2018. \"The Finches.\" American Ornithological Journal 60 (4): 990-1005." + }, + "publicationIDType": { + "typeName": "publicationIDType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "doi" + }, + "publicationIDNumber": { + "typeName": "publicationIDNumber", + "multiple": false, + "typeClass": "primitive", + "value": "10.5072/FK2/RV16HK" + }, + "publicationURL": { + "typeName": "publicationURL", + "multiple": false, + "typeClass": "primitive", + "value": "https://doi.org/10.5072/FK2/RV16HK" + } + } + ] + }, + { + "typeName": "timePeriodCovered", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "timePeriodCoveredStart": { + "typeName": "timePeriodCoveredStart", + "multiple": false, + "typeClass": "primitive", + "value": "2002" + }, + "timePeriodCoveredEnd": { + "typeName": "timePeriodCoveredEnd", + "multiple": false, + "typeClass": "primitive", + "value": "2005" + } + }, + { + "timePeriodCoveredStart": { + "typeName": "timePeriodCoveredStart", + "multiple": false, + "typeClass": "primitive", + "value": "2001-10-01" + }, + "timePeriodCoveredEnd": { + "typeName": "timePeriodCoveredEnd", + "multiple": false, + "typeClass": "primitive", + "value": "2015-11-15" + } + } + ] + } + ], + "displayName": "Citation Metadata", + "name": "citation" + }, + "geospatial": { + "displayName": "Geospatial Metadata", + "name": "geospatial", + "fields": [ + { + "typeName": "geographicCoverage", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "city": { + "typeName": "city", + "multiple": false, + "typeClass": "primitive", + "value": "Columbus" + }, + "state": { + "typeName": "state", + "multiple": false, + "typeClass": "primitive", + "value": "Ohio" + }, + "country": { + "typeName": "country", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "United States" + }, + "otherGeographicCoverage": { + "typeName": "otherGeographicCoverage", + "multiple": false, + "typeClass": "primitive", + "value": "North America" + } + }, + { + "country": { + "typeName": "country", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "United States" + }, + "state": { + "typeName": "state", + "multiple": false, + "typeClass": "primitive", + "value": "Wisconsin" + } + } + ] + } + ] + } + } + } +} From 8fbce47b7345cbeb96e46c4353376ca0c518dd01 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 19 Oct 2022 11:53:23 -0400 Subject: [PATCH 294/608] release notes --- doc/release-notes/7349-1-schema.org-updates.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 doc/release-notes/7349-1-schema.org-updates.md diff --git a/doc/release-notes/7349-1-schema.org-updates.md b/doc/release-notes/7349-1-schema.org-updates.md new file mode 100644 index 00000000000..2934a596001 --- /dev/null +++ b/doc/release-notes/7349-1-schema.org-updates.md @@ -0,0 +1,3 @@ +The Schema.org metadata export and the schema.org metadata embedded in dataset pages has been updated to improve compliance with Schema.org's schema and Google's recommendations. + +Backward compatibility - descriptions are now joined and truncated to less than 5K characters. \ No newline at end of file From e5993804d258043139adae35afb05a46185aea81 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 18 Oct 2022 18:58:23 -0400 Subject: [PATCH 295/608] restore citation/identifier, add test for url --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 1 + .../harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java | 1 + 2 files changed, 2 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 41b3cba8bd8..e09c458915c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1920,6 +1920,7 @@ public String getJsonLd() { } if (pubUrl != null) { citationEntry.add("@id", pubUrl); + citationEntry.add("identifier", pubUrl); citationEntry.add("url", pubUrl); } if (addToArray) { diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index b5453e75fe5..8083d4a8851 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -160,6 +160,7 @@ public void testExportDataset() throws Exception { assertEquals("Finch, Fiona 2018. \"The Finches.\" American Ornithological Journal 60 (4): 990-1005.", json2.getJsonArray("citation").getJsonObject(0).getString("text")); assertEquals("https://doi.org/10.5072/FK2/RV16HK", json2.getJsonArray("citation").getJsonObject(0).getString("@id")); assertEquals("https://doi.org/10.5072/FK2/RV16HK", json2.getJsonArray("citation").getJsonObject(0).getString("identifier")); + assertEquals("https://doi.org/10.5072/FK2/RV16HK", json2.getJsonArray("citation").getJsonObject(0).getString("url")); assertEquals("2002/2005", json2.getJsonArray("temporalCoverage").getString(0)); assertEquals("2001-10-01/2015-11-15", json2.getJsonArray("temporalCoverage").getString(1)); assertEquals(null, json2.getString("schemaVersion", null)); From 286b0825c601addbca463dd4cd02bc571e6f67cf Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 19 Oct 2022 12:21:47 -0400 Subject: [PATCH 296/608] fix text->name test --- .../harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index 8083d4a8851..c0dfcd9e34d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -157,7 +157,7 @@ public void testExportDataset() throws Exception { // This dataset, for example, has multiple keywords separated by commas: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/24034&version=2.0 assertEquals("keywords, with, commas", json2.getJsonArray("keywords").getString(4)); assertEquals("CreativeWork", json2.getJsonArray("citation").getJsonObject(0).getString("@type")); - assertEquals("Finch, Fiona 2018. \"The Finches.\" American Ornithological Journal 60 (4): 990-1005.", json2.getJsonArray("citation").getJsonObject(0).getString("text")); + assertEquals("Finch, Fiona 2018. \"The Finches.\" American Ornithological Journal 60 (4): 990-1005.", json2.getJsonArray("citation").getJsonObject(0).getString("name")); assertEquals("https://doi.org/10.5072/FK2/RV16HK", json2.getJsonArray("citation").getJsonObject(0).getString("@id")); assertEquals("https://doi.org/10.5072/FK2/RV16HK", json2.getJsonArray("citation").getJsonObject(0).getString("identifier")); assertEquals("https://doi.org/10.5072/FK2/RV16HK", json2.getJsonArray("citation").getJsonObject(0).getString("url")); From c28b4b3c277fceb13cd05f0bbe2a80f5fff11d7b Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 19 Oct 2022 12:40:40 -0400 Subject: [PATCH 297/608] release notes --- doc/release-notes/7349-2-schema.org-updates.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 doc/release-notes/7349-2-schema.org-updates.md diff --git a/doc/release-notes/7349-2-schema.org-updates.md b/doc/release-notes/7349-2-schema.org-updates.md new file mode 100644 index 00000000000..41f2dfb766a --- /dev/null +++ b/doc/release-notes/7349-2-schema.org-updates.md @@ -0,0 +1,3 @@ +The Schema.org metadata export and the schema.org metadata embedded in dataset pages has been updated to improve compliance with Schema.org's schema and Google's recommendations. + +Backward compatibility - the "citation"/"text" key has been replaced by a "citation"/"name" key. \ No newline at end of file From 80059c9cb65cc042550f2f3fd470198ed337636e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 19 Oct 2022 13:16:43 -0400 Subject: [PATCH 298/608] redo of schema.org file changes --- .../java/edu/harvard/iq/dataverse/DatasetVersion.java | 8 +++----- .../iq/dataverse/export/SchemaDotOrgExporterTest.java | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 30815c43381..314e06149ee 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -2012,7 +2012,7 @@ public String getJsonLd() { } fileObject.add("@type", "DataDownload"); fileObject.add("name", fileMetadata.getLabel()); - fileObject.add("fileFormat", fileMetadata.getDataFile().getContentType()); + fileObject.add("encodingFormat", fileMetadata.getDataFile().getContentType()); fileObject.add("contentSize", fileMetadata.getDataFile().getFilesize()); fileObject.add("description", fileMetadata.getDescription()); fileObject.add("@id", filePidUrlAsString); @@ -2021,10 +2021,8 @@ public String getJsonLd() { if (hideFilesBoolean != null && hideFilesBoolean.equals("true")) { // no-op } else { - if (FileUtil.isPubliclyDownloadable(fileMetadata)) { - String nullDownloadType = null; - fileObject.add("contentUrl", dataverseSiteUrl + FileUtil.getFileDownloadUrlPath(nullDownloadType, fileMetadata.getDataFile().getId(), false, fileMetadata.getId())); - } + String nullDownloadType = null; + fileObject.add("contentUrl", dataverseSiteUrl + FileUtil.getFileDownloadUrlPath(nullDownloadType, fileMetadata.getDataFile().getId(), false, fileMetadata.getId())); } fileArray.add(fileObject); } diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index b5453e75fe5..f5bc5fd97d0 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -181,7 +181,7 @@ public void testExportDataset() throws Exception { assertEquals(2, json2.getJsonArray("spatialCoverage").size()); assertEquals("DataDownload", json2.getJsonArray("distribution").getJsonObject(0).getString("@type")); assertEquals("README.md", json2.getJsonArray("distribution").getJsonObject(0).getString("name")); - assertEquals("text/plain", json2.getJsonArray("distribution").getJsonObject(0).getString("fileFormat")); + assertEquals("text/plain", json2.getJsonArray("distribution").getJsonObject(0).getString("encodingFormat")); assertEquals(1234, json2.getJsonArray("distribution").getJsonObject(0).getInt("contentSize")); assertEquals("README file.", json2.getJsonArray("distribution").getJsonObject(0).getString("description")); assertEquals("https://doi.org/10.5072/FK2/7V5MPI", json2.getJsonArray("distribution").getJsonObject(0).getString("@id")); From d93df2ae32f1b00a7283f298b6de54d9008cd5b6 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 19 Oct 2022 13:29:11 -0400 Subject: [PATCH 299/608] release note --- doc/release-notes/7349-3-schema.org-updates.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 doc/release-notes/7349-3-schema.org-updates.md diff --git a/doc/release-notes/7349-3-schema.org-updates.md b/doc/release-notes/7349-3-schema.org-updates.md new file mode 100644 index 00000000000..6a9c5e2b9b0 --- /dev/null +++ b/doc/release-notes/7349-3-schema.org-updates.md @@ -0,0 +1,3 @@ +The Schema.org metadata export and the schema.org metadata embedded in dataset pages has been updated to improve compliance with Schema.org's schema and Google's recommendations. + +Backward compatibility - file entries now have the mimetype reported as 'encodingFormat' rather than 'fileFormat' to better conform with the Schema.org specification for DataDownload entries. \ No newline at end of file From 0b375cfb38c5303c18045a80aaf347977414b8ef Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 18 Oct 2022 17:43:55 -0400 Subject: [PATCH 300/608] add type for person/org, add sameas, fix affiliation --- .../harvard/iq/dataverse/DatasetVersion.java | 50 +++++++++---- .../iq/dataverse/util/PersonOrOrgUtil.java | 72 +++++++++++++++++++ 2 files changed, 107 insertions(+), 15 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 30815c43381..64371148254 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.util.MarkupChecker; +import edu.harvard.iq.dataverse.util.PersonOrOrgUtil; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.DatasetFieldType.FieldType; import edu.harvard.iq.dataverse.branding.BrandingUtil; @@ -1802,27 +1803,46 @@ public String getJsonLd() { for (DatasetAuthor datasetAuthor : this.getDatasetAuthors()) { JsonObjectBuilder author = Json.createObjectBuilder(); String name = datasetAuthor.getName().getDisplayValue(); + String identifierAsUrl = datasetAuthor.getIdentifierAsUrl(); DatasetField authorAffiliation = datasetAuthor.getAffiliation(); String affiliation = null; if (authorAffiliation != null) { affiliation = datasetAuthor.getAffiliation().getDisplayValue(); } - // We are aware of "givenName" and "familyName" but instead of a person it might be an organization such as "Gallup Organization". - //author.add("@type", "Person"); - author.add("name", name); - // We are aware that the following error is thrown by https://search.google.com/structured-data/testing-tool - // "The property affiliation is not recognized by Google for an object of type Thing." - // Someone at Google has said this is ok. - // This logic could be moved into the `if (authorAffiliation != null)` block above. - if (!StringUtil.isEmpty(affiliation)) { - author.add("affiliation", affiliation); - } - String identifierAsUrl = datasetAuthor.getIdentifierAsUrl(); - if (identifierAsUrl != null) { - // It would be valid to provide an array of identifiers for authors but we have decided to only provide one. - author.add("@id", identifierAsUrl); - author.add("identifier", identifierAsUrl); + JsonObject entity = PersonOrOrgUtil.getPersonOrOrganization(name, (identifierAsUrl==null)); + String givenName= entity.getString("givenName"); + String familyName= entity.getString("familyName"); + + if (entity.getBoolean("isPerson")) { + // Person + author.add("@type", "Person"); + if (givenName != null) { + author.add("givenName", givenName); + } + if (familyName != null) { + author.add("familyName", familyName); + } + if (!StringUtil.isEmpty(affiliation)) { + author.add("affiliation", Json.createObjectBuilder().add("@type", "Organization").add("name", affiliation)); + } + //Currently all possible identifier URLs are for people not Organizations + if(identifierAsUrl != null) { + author.add("sameas", identifierAsUrl); + //Legacy - not sure if these are still useful + author.add("@id", identifierAsUrl); + author.add("identifier", identifierAsUrl); + + } + } else { + // Organization + author.add("@type", "Organization"); + if (!StringUtil.isEmpty(affiliation)) { + author.add("parentOrganization", Json.createObjectBuilder().add("@type", "Organization").add("name", affiliation)); + } } + // Both cases + author.add("name", entity.getString("name")); + //And add to the array authors.add(author); } JsonArray authorsArray = authors.build(); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java new file mode 100644 index 00000000000..8d767d2e535 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java @@ -0,0 +1,72 @@ +package edu.harvard.iq.dataverse.util; + +import javax.json.JsonObject; +import javax.json.JsonObjectBuilder; + +import edu.harvard.iq.dataverse.export.openaire.Cleanup; +import edu.harvard.iq.dataverse.export.openaire.FirstNames; +import edu.harvard.iq.dataverse.export.openaire.Organizations; +import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; + +/** + * + * @author qqmyers + * + * Adapted from earlier code in OpenAireExportUtil + * + * Implements an algorithm derived from code at DataCite to determine + * whether a name is that of a Person or Organization and, if the + * former, to pull out the given and family names. + */ + +public class PersonOrOrgUtil { + + public static JsonObject getPersonOrOrganization(String name, boolean organizationIfTied) { + name = Cleanup.normalize(name); + + String givenName = null; + String familyName = null; + // Datacite algorithm, + // https://github.com/IQSS/dataverse/issues/2243#issuecomment-358615313 + boolean isOrganization = Organizations.getInstance().isOrganization(name); + if (name.contains(",")) { + givenName = FirstNames.getInstance().getFirstName(name); + // contributorName=, + if (givenName != null && !isOrganization) { + // givenName ok + isOrganization = false; + // contributor_map.put("nameType", "Personal"); + if (!name.replaceFirst(",", "").contains(",")) { + // contributorName=, + String[] fullName = name.split(", "); + givenName = fullName[1]; + familyName = fullName[0]; + } + } else if (isOrganization || organizationIfTied) { + isOrganization = true; + } + + } else { + givenName = FirstNames.getInstance().getFirstName(name); + + if (givenName != null && !isOrganization) { + isOrganization = false; + if (givenName.length() + 1 < name.length()) { + familyName = name.substring(givenName.length() + 1); + } + } else { + // default + if (isOrganization || organizationIfTied) { + isOrganization = true; + } + } + } + JsonObjectBuilder job = new NullSafeJsonBuilder(); + job.add("fullname", name); + job.add("givenName", givenName); + job.add("familyName", familyName); + job.add("isPerson", !isOrganization); + return job.build(); + + } +} From 5bd58d8f4390fc4eed31ba8f64835b527ca939fb Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 18 Oct 2022 18:02:01 -0400 Subject: [PATCH 301/608] typo --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 64371148254..2aca5cc9705 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1841,7 +1841,7 @@ public String getJsonLd() { } } // Both cases - author.add("name", entity.getString("name")); + author.add("name", entity.getString("fullname")); //And add to the array authors.add(author); } From 63cd77d2a834221889125fbda952bd193e44d099 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 19 Oct 2022 13:34:46 -0400 Subject: [PATCH 302/608] capitalization --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 2aca5cc9705..8aaf0d2fd89 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1827,7 +1827,7 @@ public String getJsonLd() { } //Currently all possible identifier URLs are for people not Organizations if(identifierAsUrl != null) { - author.add("sameas", identifierAsUrl); + author.add("sameAs", identifierAsUrl); //Legacy - not sure if these are still useful author.add("@id", identifierAsUrl); author.add("identifier", identifierAsUrl); From 8084fb8796700eb56fdbcc17a6b792946875a5f0 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 19 Oct 2022 13:37:45 -0400 Subject: [PATCH 303/608] update tests --- .../iq/dataverse/export/SchemaDotOrgExporterTest.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index b5453e75fe5..2327de43ca4 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -137,13 +137,15 @@ public void testExportDataset() throws Exception { assertEquals("https://doi.org/10.5072/FK2/IMK5A4", json2.getString("identifier")); assertEquals("Darwin's Finches", json2.getString("name")); assertEquals("Finch, Fiona", json2.getJsonArray("creator").getJsonObject(0).getString("name")); - assertEquals("Birds Inc.", json2.getJsonArray("creator").getJsonObject(0).getString("affiliation")); + assertEquals("Birds Inc.", json2.getJsonArray("creator").getJsonObject(0).getJsonObject("affiliation").getString("name")); assertEquals("https://orcid.org/0000-0002-1825-0097", json2.getJsonArray("creator").getJsonObject(0).getString("@id")); assertEquals("https://orcid.org/0000-0002-1825-0097", json2.getJsonArray("creator").getJsonObject(0).getString("identifier")); + assertEquals("https://orcid.org/0000-0002-1825-0097", json2.getJsonArray("creator").getJsonObject(0).getString("sameAs")); assertEquals("Finch, Fiona", json2.getJsonArray("author").getJsonObject(0).getString("name")); - assertEquals("Birds Inc.", json2.getJsonArray("author").getJsonObject(0).getString("affiliation")); + assertEquals("Birds Inc.", json2.getJsonArray("author").getJsonObject(0).getJsonObject("affiliation").getString("name")); assertEquals("https://orcid.org/0000-0002-1825-0097", json2.getJsonArray("author").getJsonObject(0).getString("@id")); assertEquals("https://orcid.org/0000-0002-1825-0097", json2.getJsonArray("author").getJsonObject(0).getString("identifier")); + assertEquals("https://orcid.org/0000-0002-1825-0097", json2.getJsonArray("author").getJsonObject(0).getString("sameAs")); assertEquals("1955-11-05", json2.getString("datePublished")); assertEquals("1955-11-05", json2.getString("dateModified")); assertEquals("1", json2.getString("version")); From 489d0e36e9b81c095b7387522d95b92516c00b69 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 19 Oct 2022 13:40:43 -0400 Subject: [PATCH 304/608] legacy test issue --- .../harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index 2327de43ca4..68bab7c8bb4 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -170,7 +170,7 @@ public void testExportDataset() throws Exception { assertEquals("LibraScholar", json2.getJsonObject("includedInDataCatalog").getString("name")); assertEquals("https://librascholar.org", json2.getJsonObject("includedInDataCatalog").getString("url")); assertEquals("Organization", json2.getJsonObject("publisher").getString("@type")); - assertEquals("LibraScholar", json2.getJsonObject("provider").getString("name")); + assertEquals("LibraScholar", json2.getJsonObject("publisher").getString("name")); assertEquals("Organization", json2.getJsonObject("provider").getString("@type")); assertEquals("LibraScholar", json2.getJsonObject("provider").getString("name")); assertEquals("Organization", json2.getJsonArray("funder").getJsonObject(0).getString("@type")); From c3260a5009c99f0765d012d9ce20ef27048cb738 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 19 Oct 2022 17:41:21 -0400 Subject: [PATCH 305/608] change fullname -> fullName --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 2 +- .../java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 8aaf0d2fd89..8e9a0950b2a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1841,7 +1841,7 @@ public String getJsonLd() { } } // Both cases - author.add("name", entity.getString("fullname")); + author.add("name", entity.getString("fullName")); //And add to the array authors.add(author); } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java index 8d767d2e535..add5c8285ae 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java @@ -62,7 +62,7 @@ public static JsonObject getPersonOrOrganization(String name, boolean organizati } } JsonObjectBuilder job = new NullSafeJsonBuilder(); - job.add("fullname", name); + job.add("fullName", name); job.add("givenName", givenName); job.add("familyName", familyName); job.add("isPerson", !isOrganization); From 3ddc7960f24a63bf322d90befd71c3c440ab3101 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 19 Oct 2022 17:41:31 -0400 Subject: [PATCH 306/608] note todos --- .../export/openaire/OpenAireExportUtil.java | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java index 49fe203b96d..bea3858a60e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java @@ -256,7 +256,10 @@ public static void writeCreatorsElement(XMLStreamWriter xmlw, DatasetVersionDTO creator_map.put("nameType", "Personal"); nameType_check = true; } - + // ToDo - the algorithm to determine if this is a Person or Organization here + // has been abstracted into a separate + // edu.harvard.iq.dataverse.util.PersonOrOrgUtil class that could be used here + // to avoid duplication/variants of the algorithm creatorName = Cleanup.normalize(creatorName); // Datacite algorithm, https://github.com/IQSS/dataverse/issues/2243#issuecomment-358615313 if (creatorName.contains(",")) { @@ -706,6 +709,11 @@ public static void writeContributorElement(XMLStreamWriter xmlw, String contribu boolean nameType_check = false; Map contributor_map = new HashMap(); + // ToDo - the algorithm to determine if this is a Person or Organization here + // has been abstracted into a separate + // edu.harvard.iq.dataverse.util.PersonOrOrgUtil class that could be used here + // to avoid duplication/variants of the algorithm + contributorName = Cleanup.normalize(contributorName); // Datacite algorithm, https://github.com/IQSS/dataverse/issues/2243#issuecomment-358615313 if (contributorName.contains(",")) { @@ -717,6 +725,9 @@ public static void writeContributorElement(XMLStreamWriter xmlw, String contribu // givenName ok contributor_map.put("nameType", "Personal"); nameType_check = true; + // re: the above toDo - the ("ContactPerson".equals(contributorType) && + // !isValidEmailAddress(contributorName)) clause in the next line could/should + // be sent as the OrgIfTied boolean parameter } else if (isOrganization || ("ContactPerson".equals(contributorType) && !isValidEmailAddress(contributorName))) { contributor_map.put("nameType", "Organizational"); } From 05ea63aa98a7c896fbfbbfa00eb4c6755bd317ad Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 19 Oct 2022 17:42:27 -0400 Subject: [PATCH 307/608] add tests same examples as in OrganizationTest but using the extracted algorithm and also checking given/family name in relevant cases --- .../dataverse/util/PersonOrOrgUtilTest.java | 92 +++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java diff --git a/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java new file mode 100644 index 00000000000..32c72e9497c --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java @@ -0,0 +1,92 @@ +package edu.harvard.iq.dataverse.util; + +import edu.harvard.iq.dataverse.export.openaire.Organizations; +import edu.harvard.iq.dataverse.util.json.JsonUtil; + +import org.junit.Ignore; +import org.junit.Test; +import static org.junit.Assert.*; + +import javax.json.JsonObject; + +public class PersonOrOrgUtilTest { + + public PersonOrOrgUtilTest() { + } + + @Test + public void testOrganizationSimpleName() { + verifyIsOrganization("IBM"); + verifyIsOrganization("Harvard University"); + } + + @Test + public void testOrganizationCOMPLEXName() { + verifyIsOrganization("The Institute for Quantitative Social Science"); + verifyIsOrganization("Council on Aging"); + verifyIsOrganization("The Ford Foundation"); + verifyIsOrganization("United Nations Economic and Social Commission for Asia and the Pacific (UNESCAP)"); + verifyIsOrganization("Michael J. Fox Foundation for Parkinson's Research"); + } + + @Test + public void testOrganizationComaOrDash() { + verifyIsOrganization("Digital Archive of Massachusetts Anti-Slavery and Anti-Segregation Petitions, Massachusetts Archives, Boston MA"); + verifyIsOrganization("U.S. Department of Commerce, Bureau of the Census, Geography Division"); + verifyIsOrganization("Harvard Map Collection, Harvard College Library"); + verifyIsOrganization("Geographic Data Technology, Inc. (GDT)"); + } + + @Ignore + @Test + public void testOrganizationES() { + //Spanish recognition is not enabled - see export/Organization.java + verifyIsOrganization("Compañía de San Fernando"); + } + + /** + * Name is composed of: + * + */ + @Test + public void testName() { + verifyIsPerson("Jorge Mario Bergoglio", "Jorge Mario", "Bergoglio"); + verifyIsPerson("Bergoglio", null, null); + verifyIsPerson("Francesco Cadili", "Francesco", "Cadili"); + // This Philip Seymour Hoffman example is from ShibUtilTest. + verifyIsPerson("Philip Seymour Hoffman", "Philip Seymour", "Hoffman"); + + // test Smith (is also a name) + verifyIsPerson("John Smith", "John", "Smith"); + // resolved using hint file + verifyIsPerson("Guido van Rossum", "Guido", "van Rossum"); + // test only name + verifyIsPerson("Francesco", "Francesco", null); + // test only family name + verifyIsPerson("Cadili", null, null); + } + + private void verifyIsOrganization(String fullName) { + JsonObject obj = PersonOrOrgUtil.getPersonOrOrganization(fullName, false); + System.out.println(JsonUtil.prettyPrint(obj)); + assertEquals(obj.getString("fullName"),fullName); + assertFalse(obj.getBoolean("isPerson")); + + } + + private void verifyIsPerson(String fullName, String givenName, String familyName) { + JsonObject obj = PersonOrOrgUtil.getPersonOrOrganization(fullName, false); + System.out.println(JsonUtil.prettyPrint(obj)); + assertEquals(obj.getString("fullName"),fullName); + assertTrue(obj.getBoolean("isPerson")); + assertEquals(obj.containsKey("givenName"), givenName != null); + if(obj.containsKey("givenName") && givenName != null) { + assertEquals(obj.getString("givenName"),givenName); + } + assertEquals(obj.containsKey("familyName"), familyName != null); + if(obj.containsKey("familyName") && familyName != null) { + assertEquals(obj.getString("familyName"),familyName); + } + } + + } From 6ca9f7099698bcfe08f6fbc98379f3f989d6a283 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 19 Oct 2022 17:44:02 -0400 Subject: [PATCH 308/608] don't send giveName for orgs it does not appear to be useful given the tests in PersonOrOrgUtilTest --- .../java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java index add5c8285ae..468949e8a40 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java @@ -44,6 +44,7 @@ public static JsonObject getPersonOrOrganization(String name, boolean organizati } } else if (isOrganization || organizationIfTied) { isOrganization = true; + givenName=null; } } else { @@ -58,6 +59,7 @@ public static JsonObject getPersonOrOrganization(String name, boolean organizati // default if (isOrganization || organizationIfTied) { isOrganization = true; + givenName=null; } } } From a5fafd079d64ed334fa45fa238765815bd262f05 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 19 Oct 2022 17:48:12 -0400 Subject: [PATCH 309/608] release note --- doc/release-notes/7349-4-schema.org-updates.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 doc/release-notes/7349-4-schema.org-updates.md diff --git a/doc/release-notes/7349-4-schema.org-updates.md b/doc/release-notes/7349-4-schema.org-updates.md new file mode 100644 index 00000000000..1247471f137 --- /dev/null +++ b/doc/release-notes/7349-4-schema.org-updates.md @@ -0,0 +1,3 @@ +The Schema.org metadata export and the schema.org metadata embedded in dataset pages has been updated to improve compliance with Schema.org's schema and Google's recommendations. + +Backward compatibility - author/creators now have an @type of Person or Organization and any affiliation (affiliation for Person, parentOrganization for Organization) is now an object of @type Organization \ No newline at end of file From f222160d16705b99f1e942037fc68828732f9934 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 20 Oct 2022 18:06:22 -0400 Subject: [PATCH 310/608] bugfix for no givenName/familyName from algorithm --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 8e9a0950b2a..278ab246fcf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1810,8 +1810,8 @@ public String getJsonLd() { affiliation = datasetAuthor.getAffiliation().getDisplayValue(); } JsonObject entity = PersonOrOrgUtil.getPersonOrOrganization(name, (identifierAsUrl==null)); - String givenName= entity.getString("givenName"); - String familyName= entity.getString("familyName"); + String givenName= entity.containsKey("givenName") ? entity.getString("givenName"):null; + String familyName= entity.containsKey("familyName")? entity.getString("familyName"):null; if (entity.getBoolean("isPerson")) { // Person From 6e37e0eb05a856520b259572bb0dca3ef96a4e49 Mon Sep 17 00:00:00 2001 From: Jan Range Date: Fri, 21 Oct 2022 13:09:01 +0200 Subject: [PATCH 311/608] Added controlled vocab bool and values to JSON --- .../edu/harvard/iq/dataverse/util/json/JsonPrinter.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index 1b7a52b1ea5..a8c3013c2ef 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -551,6 +551,14 @@ public static JsonObjectBuilder json(DatasetFieldType fld) { fieldsBld.add("watermark", fld.getWatermark()); fieldsBld.add("description", fld.getDescription()); fieldsBld.add("multiple", fld.isAllowMultiples()); + fieldsBld.add("isControlledVocabulary", fld.isControlledVocabulary()); + if (fld.isControlledVocabulary()) { + // If the field has a controlled vocabulary, + // add all values to the resulting JSON + fieldsBld.add( + "controlledVocabularyValues", + fld.getControlledVocabularyValues().toArray().toString()); + } if (!fld.getChildDatasetFieldTypes().isEmpty()) { JsonObjectBuilder subFieldsBld = jsonObjectBuilder(); for (DatasetFieldType subFld : fld.getChildDatasetFieldTypes()) { From 41c30d9de4970b57f8547dbb443d594aefc92e9e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 21 Oct 2022 15:22:30 -0400 Subject: [PATCH 312/608] add assumeCommaInPersonName and tests --- .../harvard/iq/dataverse/DatasetVersion.java | 2 +- .../iq/dataverse/util/PersonOrOrgUtil.java | 51 ++++++++++++++----- .../dataverse/util/PersonOrOrgUtilTest.java | 14 ++++- 3 files changed, 53 insertions(+), 14 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 278ab246fcf..1204d1dd4f1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1811,7 +1811,7 @@ public String getJsonLd() { } JsonObject entity = PersonOrOrgUtil.getPersonOrOrganization(name, (identifierAsUrl==null)); String givenName= entity.containsKey("givenName") ? entity.getString("givenName"):null; - String familyName= entity.containsKey("familyName")? entity.getString("familyName"):null; + String familyName= entity.containsKey("familyName") ? entity.getString("familyName"):null; if (entity.getBoolean("isPerson")) { // Person diff --git a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java index 468949e8a40..b8089422fcd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java @@ -17,18 +17,37 @@ * Implements an algorithm derived from code at DataCite to determine * whether a name is that of a Person or Organization and, if the * former, to pull out the given and family names. + * + * Adds a parameter that can improve accuracy, e.g. for curated + * repositories, allowing the code to assume that all Person entries are + * in , order. + * + * Possible ToDo - one could also allow local configuration of specific + * words that will automatically categorize one-off cases that the + * algorithm would otherwise mis-categorize. For example, the code + * appears to not recognize names ending in "Project" as an + * Organization. + * */ public class PersonOrOrgUtil { + static boolean assumeCommaInPersonName = false; + + static { + setAssumeCommaInPersonName(Boolean.parseBoolean(System.getProperty("dataverse.personOrOrg.assumeCommaInPersonName", "false"))); + } + public static JsonObject getPersonOrOrganization(String name, boolean organizationIfTied) { name = Cleanup.normalize(name); String givenName = null; String familyName = null; - // Datacite algorithm, + // adapted from a Datacite algorithm, // https://github.com/IQSS/dataverse/issues/2243#issuecomment-358615313 boolean isOrganization = Organizations.getInstance().isOrganization(name); + // ToDo - could add a check of stop words to handle problem cases, i.e. if name + // contains something in that list, it is an org if (name.contains(",")) { givenName = FirstNames.getInstance().getFirstName(name); // contributorName=, @@ -48,18 +67,21 @@ public static JsonObject getPersonOrOrganization(String name, boolean organizati } } else { - givenName = FirstNames.getInstance().getFirstName(name); - - if (givenName != null && !isOrganization) { - isOrganization = false; - if (givenName.length() + 1 < name.length()) { - familyName = name.substring(givenName.length() + 1); - } + if (assumeCommaInPersonName) { + isOrganization = true; } else { - // default - if (isOrganization || organizationIfTied) { - isOrganization = true; - givenName=null; + givenName = FirstNames.getInstance().getFirstName(name); + + if (givenName != null && !isOrganization) { + isOrganization = false; + if (givenName.length() + 1 < name.length()) { + familyName = name.substring(givenName.length() + 1); + } + } else { + // default + if (isOrganization || organizationIfTied) { + isOrganization = true; + } } } } @@ -71,4 +93,9 @@ public static JsonObject getPersonOrOrganization(String name, boolean organizati return job.build(); } + + public static void setAssumeCommaInPersonName(boolean assume) { + assumeCommaInPersonName = assume; + } + } diff --git a/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java index 32c72e9497c..dbda622b536 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java @@ -30,7 +30,19 @@ public void testOrganizationCOMPLEXName() { } @Test - public void testOrganizationComaOrDash() { + public void testOrganizationAcademicName() { + + verifyIsOrganization("John Smith Center"); + verifyIsOrganization("John Smith Group"); + //An example the base algorithm doesn't handle: + PersonOrOrgUtil.setAssumeCommaInPersonName(true); + verifyIsOrganization("John Smith Project"); + PersonOrOrgUtil.setAssumeCommaInPersonName(false); + } + + + @Test + public void testOrganizationCommaOrDash() { verifyIsOrganization("Digital Archive of Massachusetts Anti-Slavery and Anti-Segregation Petitions, Massachusetts Archives, Boston MA"); verifyIsOrganization("U.S. Department of Commerce, Bureau of the Census, Geography Division"); verifyIsOrganization("Harvard Map Collection, Harvard College Library"); From 26e9861fe586503b4bd485cdb1c5d1b00fd7662b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 21 Oct 2022 16:48:11 -0400 Subject: [PATCH 313/608] Add dvwebloader as upload option --- .../edu/harvard/iq/dataverse/DatasetPage.java | 27 ++++++++++++++++++- .../harvard/iq/dataverse/SettingsWrapper.java | 9 +++++++ .../settings/SettingsServiceBean.java | 4 ++- .../iq/dataverse/util/SystemConfig.java | 12 ++++++++- src/main/java/propertyFiles/Bundle.properties | 4 +++ src/main/webapp/editFilesFragment.xhtml | 6 ++++- src/main/webapp/resources/css/structure.css | 1 + 7 files changed, 59 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 0a8db69bf5b..750636fec45 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -56,6 +56,7 @@ import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.URLTokenUtil; import edu.harvard.iq.dataverse.validation.URLValidator; import edu.harvard.iq.dataverse.workflows.WorkflowComment; @@ -1845,7 +1846,9 @@ public boolean globusUploadSupported() { return settingsWrapper.isGlobusUpload() && settingsWrapper.isGlobusEnabledStorageDriver(dataset.getEffectiveStorageDriverId()); } - + public boolean webloaderUploadSupported() { + return settingsWrapper.isWebloaderUpload() && StorageIO.isDirectUploadEnabled(dataset.getEffectiveStorageDriverId()); + } private String init(boolean initFull) { @@ -6062,4 +6065,26 @@ public void startGlobusTransfer() { } PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken)); } + + public String getWebloaderUrlForDataset(Dataset d) { + String localeCode = session.getLocaleCode(); + ApiToken apiToken = null; + User user = session.getUser(); + + if (user instanceof AuthenticatedUser) { + apiToken = authService.findApiTokenByUser((AuthenticatedUser) user); + + if ((apiToken == null) || (apiToken.getExpireTime().before(new Date()))) { + logger.fine("Created apiToken for user: " + user.getIdentifier()); + apiToken = authService.generateApiTokenForUser((AuthenticatedUser) user); + } + } + // Use URLTokenUtil for params currently in common with external tools. + URLTokenUtil tokenUtil = new URLTokenUtil(d, apiToken, localeCode); + String appUrl; + appUrl = settingsService.getValueForKey(SettingsServiceBean.Key.WebloaderUrl) + + "?datasetPid={datasetPid}&siteUrl={siteUrl}&key={apiToken}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}"; + return tokenUtil.replaceTokensWithValues(appUrl); + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java index aa40423000d..bf36f265743 100644 --- a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java @@ -107,6 +107,8 @@ public class SettingsWrapper implements java.io.Serializable { private Boolean rsyncOnly = null; + private Boolean webloaderUpload = null; + private String metricsUrl = null; private Boolean dataFilePIDSequentialDependent = null; @@ -338,6 +340,13 @@ public String getGlobusAppUrl() { } + public boolean isWebloaderUpload() { + if (webloaderUpload == null) { + webloaderUpload = systemConfig.isWebloaderUpload(); + } + return webloaderUpload; + } + public boolean isRsyncOnly() { if (rsyncOnly == null) { String downloadMethods = getValueForKey(SettingsServiceBean.Key.DownloadMethods); diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 50e29d2a333..371463fb215 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -563,7 +563,9 @@ Whether Harvesting (OAI) service is enabled /* * Allow a custom JavaScript to control values of specific fields. */ - ControlledVocabularyCustomJavaScript + ControlledVocabularyCustomJavaScript, + + WebloaderUrl ; @Override diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index 7abd0d02065..62dcbfc8ab0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -862,7 +862,13 @@ public enum FileUploadMethods { * Upload through Globus of large files */ - GLOBUS("globus") + GLOBUS("globus"), + + /** + * Upload folders of files through dvwebloader app + */ + + WEBLOADER("dvwebloader"); ; @@ -999,6 +1005,10 @@ public boolean isRsyncUpload(){ public boolean isGlobusUpload(){ return getMethodAvailable(FileUploadMethods.GLOBUS.toString(), true); } + + public boolean isWebloaderUpload(){ + return getMethodAvailable(FileUploadMethods.WEBLOADER.toString(), true); + } // Controls if HTTP upload is enabled for both GUI and API. public boolean isHTTPUpload(){ diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 8a4fdeb9e28..1019ec5d3e8 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -1671,6 +1671,10 @@ file.finishGlobus=Globus Transfer has finished file.downloadFromGlobus=Download through Globus file.globus.transfer=Globus Transfer file.globus.of=of: +file.fromWebloader.tip=Upload a folder of files. This method retains the relative path structure on from your local machine. (Using it will cancel any other types of uploads in progress on this page.) +file.fromWebloaderAfterCreate.tip=This option will be enabled after this dataset is created. +file.fromWebloader=Upload a Folder + file.api.httpDisabled=File upload via HTTP is not available for this installation of Dataverse. file.api.alreadyHasPackageFile=File upload via HTTP disabled since this dataset already contains a package file. file.replace.original=Original File diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml index 0fd5bf48fb7..40937008ab8 100644 --- a/src/main/webapp/editFilesFragment.xhtml +++ b/src/main/webapp/editFilesFragment.xhtml @@ -158,7 +158,11 @@ widgetVar="fileUploadWidget"> - +
+

#{bundle['file.webloader.tip']}

+

#{bundle['file.webloaderAfterCreate.tip']}

+ +

#{bundle['file.fromDropbox.tip']}

diff --git a/src/main/webapp/resources/css/structure.css b/src/main/webapp/resources/css/structure.css index c184c46cee9..65489431d65 100644 --- a/src/main/webapp/resources/css/structure.css +++ b/src/main/webapp/resources/css/structure.css @@ -883,6 +883,7 @@ div.panel-body.read-terms{max-height:220px; overflow-y:scroll; width:100%; backg #dragdropMsg {padding:20px;font-size:1.3em;color:#808080;text-align:center;} .dropin-btn-status.ui-icon {background: url("https://www.dropbox.com/static/images/widgets/dbx-saver-status.png") no-repeat;} .globus-btn.ui-icon {background: url("https://docs.globus.org/images/home/transfer.png") no-repeat;background-size:contain;display:inline-block;} +.webloader-btn.ui-icon {background: url("resources/images/folders.png") no-repeat;background-size:contain;display:inline-block;} /* VERSIONS */ From d5d365589f627bedd529cbb93be5af33ae63e560 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 21 Oct 2022 17:02:44 -0400 Subject: [PATCH 314/608] update docs/release note --- doc/release-notes/7349-4-schema.org-updates.md | 2 ++ doc/sphinx-guides/source/installation/config.rst | 10 ++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/doc/release-notes/7349-4-schema.org-updates.md b/doc/release-notes/7349-4-schema.org-updates.md index 1247471f137..2c78243dc29 100644 --- a/doc/release-notes/7349-4-schema.org-updates.md +++ b/doc/release-notes/7349-4-schema.org-updates.md @@ -1,3 +1,5 @@ The Schema.org metadata export and the schema.org metadata embedded in dataset pages has been updated to improve compliance with Schema.org's schema and Google's recommendations. +New jvm-option: dataverse.personOrOrg.assumeCommaInPersonName, default is false + Backward compatibility - author/creators now have an @type of Person or Organization and any affiliation (affiliation for Person, parentOrganization for Organization) is now an object of @type Organization \ No newline at end of file diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index f2de9d5702f..3e01f372c9b 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1627,8 +1627,6 @@ By default, download URLs to files will be included in Schema.org JSON-LD output ``./asadmin create-jvm-options '-Ddataverse.files.hide-schema-dot-org-download-urls=true'`` -Please note that there are other reasons why download URLs may not be included for certain files such as if a guestbook entry is required or if the file is restricted. - For more on Schema.org JSON-LD, see the :doc:`/admin/metadataexport` section of the Admin Guide. .. _useripaddresssourceheader: @@ -1658,6 +1656,14 @@ This setting is useful in cases such as running your Dataverse installation behi "HTTP_FORWARDED", "HTTP_VIA", "REMOTE_ADDR" + +dataverse.personOrOrg.assumeCommaInPersonName ++++++++++++++++++++++++++++++++++++++++++++++ + +Please note that this setting is experimental. + +The Schema.org metadata export and the Schema.org metadata included in DatasetPages tries to infer whether each entry in the Author field is a Person or Organization. If you are sure that +users are following the guidance to add people in the recommended family name, given name order, with a comma, you can set this true to always assume entries without a comma are for Organizations. The default is false. .. _:ApplicationServerSettings: From 3d647f43aa2021121c5a45b94ec4697cfcc0b6e5 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 24 Oct 2022 20:37:54 -0400 Subject: [PATCH 315/608] move hard coded strings to SearchFields class #8239 --- .../edu/harvard/iq/dataverse/search/IndexServiceBean.java | 4 ++-- .../java/edu/harvard/iq/dataverse/search/SearchFields.java | 5 +++++ .../edu/harvard/iq/dataverse/search/SearchServiceBean.java | 3 ++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 6c4fb3f1332..8bd3f7f443d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -1003,12 +1003,12 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set Date: Mon, 24 Oct 2022 22:03:38 -0400 Subject: [PATCH 316/608] add geospatial search test #8239 --- .../harvard/iq/dataverse/api/SearchIT.java | 154 +++++++++++++++++- .../edu/harvard/iq/dataverse/api/UtilIT.java | 22 +++ 2 files changed, 175 insertions(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java index 789b60a34e7..0f2c77de717 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java @@ -36,6 +36,7 @@ import org.junit.After; import static org.junit.Assert.assertNotEquals; import static java.lang.Thread.sleep; +import javax.json.JsonObjectBuilder; public class SearchIT { @@ -1084,7 +1085,158 @@ public void testSubtreePermissions() { .statusCode(OK.getStatusCode()) .body("data.total_count", CoreMatchers.equalTo(1)); } - + + @Test + public void testGeospatialSearch() { + + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response setMetadataBlocks = UtilIT.setMetadataBlocks(dataverseAlias, Json.createArrayBuilder().add("citation").add("geospatial"), apiToken); + setMetadataBlocks.prettyPrint(); + setMetadataBlocks.then().assertThat().statusCode(OK.getStatusCode()); + + JsonObjectBuilder datasetJson = Json.createObjectBuilder() + .add("datasetVersion", Json.createObjectBuilder() + .add("metadataBlocks", Json.createObjectBuilder() + .add("citation", Json.createObjectBuilder() + .add("fields", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("typeName", "title") + .add("value", "Dataverse HQ") + .add("typeClass", "primitive") + .add("multiple", false) + ) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("authorName", + Json.createObjectBuilder() + .add("value", "Simpson, Homer") + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", "authorName")) + ) + ) + .add("typeClass", "compound") + .add("multiple", true) + .add("typeName", "author") + ) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("datasetContactEmail", + Json.createObjectBuilder() + .add("value", "hsimpson@mailinator.com") + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", "datasetContactEmail")) + ) + ) + .add("typeClass", "compound") + .add("multiple", true) + .add("typeName", "datasetContact") + ) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("dsDescriptionValue", + Json.createObjectBuilder() + .add("value", "Headquarters for Dataverse.") + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", "dsDescriptionValue")) + ) + ) + .add("typeClass", "compound") + .add("multiple", true) + .add("typeName", "dsDescription") + ) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add("Other") + ) + .add("typeClass", "controlledVocabulary") + .add("multiple", true) + .add("typeName", "subject") + ) + ) + ) + .add("geospatial", Json.createObjectBuilder() + .add("fields", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("typeName", "geographicBoundingBox") + .add("typeClass", "compound") + .add("multiple", true) + .add("value", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + // The box is roughly on Cambridge, MA + // See https://linestrings.com/bbox/#-71.187346,42.33661,-71.043056,42.409599 + .add("westLongitude", + Json.createObjectBuilder() + .add("value", "-71.187346") + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", "westLongitude") + ) + .add("southLongitude", + Json.createObjectBuilder() + .add("value", "42.33661") + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", "southLongitude") + ) + .add("eastLongitude", + Json.createObjectBuilder() + .add("value", "-71.043056") + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", "eastLongitude") + ) + .add("northLongitude", + Json.createObjectBuilder() + .add("value", "42.409599") + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", "northLongitude") + ) + ) + ) + ) + ) + ) + )); + + Response createDatasetResponse = UtilIT.createDataset(dataverseAlias, datasetJson, apiToken); + createDatasetResponse.prettyPrint(); + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDatasetResponse); + String datasetPid = JsonPath.from(createDatasetResponse.getBody().asString()).getString("data.persistentId"); + + // Plymouth rock (41.9580775,-70.6621063) is within 50 km of Cambridge. Hit. + Response search1 = UtilIT.search("id:dataset_" + datasetId + "_draft", apiToken, "&show_entity_ids=true&geo_point=41.9580775,-70.6621063&geo_radius=50"); + search1.prettyPrint(); + search1.then().assertThat() + .body("data.total_count", CoreMatchers.is(1)) + .body("data.count_in_response", CoreMatchers.is(1)) + .body("data.items[0].entity_id", CoreMatchers.is(datasetId)) + .statusCode(OK.getStatusCode()); + + // Plymouth rock (41.9580775,-70.6621063) is not within 1 km of Cambridge. Miss. + Response search2 = UtilIT.search("id:dataset_" + datasetId + "_draft", apiToken, "&geo_point=41.9580775,-70.6621063&geo_radius=1"); + search2.prettyPrint(); + search2.then().assertThat() + .body("data.total_count", CoreMatchers.is(0)) + .body("data.count_in_response", CoreMatchers.is(0)) + .statusCode(OK.getStatusCode()); + + } + @After public void tearDownDataverse() { File treesThumb = new File("scripts/search/data/binary/trees.png.thumb48"); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 7107ee783d7..3bffdaf5188 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -407,6 +407,20 @@ static Response createDatasetViaNativeApi(String dataverseAlias, String pathToJs return createDatasetResponse; } + static Response createDataset(String dataverseAlias, JsonObjectBuilder datasetJson, String apiToken) { + return createDataset(dataverseAlias, datasetJson.build().toString(), apiToken); + } + + static Response createDataset(String dataverseAlias, String datasetJson, String apiToken) { + System.out.println("creating with " + datasetJson); + Response createDatasetResponse = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .body(datasetJson) + .contentType("application/json") + .post("/api/dataverses/" + dataverseAlias + "/datasets"); + return createDatasetResponse; + } + static String getDatasetJson(String pathToJsonFile) { File datasetVersionJson = new File(pathToJsonFile); try { @@ -544,6 +558,14 @@ static Response loadMetadataBlock(String apiToken, byte[] body) { .post("/api/admin/datasetfield/load"); } + static Response setMetadataBlocks(String dataverseAlias, JsonArrayBuilder blocks, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .contentType("application/json") + .body(blocks.build().toString()) + .post("/api/dataverses/" + dataverseAlias + "/metadatablocks"); + } + static private String getDatasetXml(String title, String author, String description) { String nullLicense = null; String nullRights = null; From eca4c2d57ac92a07c3be19ea2676f59b776952c8 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Tue, 25 Oct 2022 17:38:34 +0200 Subject: [PATCH 317/608] fix for the test with license sorting --- src/test/java/edu/harvard/iq/dataverse/api/LicensesIT.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/LicensesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/LicensesIT.java index e189336b61e..50d3c5b34ea 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/LicensesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/LicensesIT.java @@ -91,7 +91,8 @@ public void testLicenses(){ getLicensesResponse.prettyPrint(); body = getLicensesResponse.getBody().asString(); status = JsonPath.from(body).getString("status"); - long licenseId = JsonPath.from(body).getLong("data[-1].id"); + //Last added licens; with the highest id + long licenseId = JsonPath.from(body).getList("data[*].id").stream().max((x, y) -> Long.compare(x, y)).get(); //Assumes the first license is active, which should be true on a test server long activeLicenseId = JsonPath.from(body).getLong("data[0].id"); assertEquals("OK", status); From e5187b2af4e1915f5eff4575c6b2ccadd62d150a Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 25 Oct 2022 14:28:40 -0400 Subject: [PATCH 318/608] Avoid DatasetCreate exception with only one coordinate #8239 With only westLongitude added and the other three coordinates left empty, we were getting the following exception. A null check was added to prevent this. Command [DatasetCreate dataset:132] failed: Exception thrown from bean: javax.ejb.EJBTransactionRolledbackException: Exception thrown from bean: org.apache.solr.client.solrj.impl.HttpSolrClient$RemoteSolrException: Error from server at http://localhost:8983/solr/collection1: ERROR: [doc=dataset_132_draft] Error adding field 'solr_bboxtype'='ENVELOPE(null,null,null,null)' msg=Unable to parse shape given formats "lat,lon", "x y" or as WKT because java.text.ParseException: Expected a number input: ENVELOPE(null,null,null,null) --- .../edu/harvard/iq/dataverse/search/IndexServiceBean.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 8bd3f7f443d..f5a5570c831 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -1008,7 +1008,10 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set Date: Tue, 25 Oct 2022 14:38:12 -0400 Subject: [PATCH 319/608] bug fixes, refactor, styling --- .../edu/harvard/iq/dataverse/DatasetPage.java | 22 ++++------- .../iq/dataverse/EditDatafilesPage.java | 22 +++++++++++ .../AuthenticationServiceBean.java | 10 +++++ .../iq/dataverse/util/WebloaderUtil.java | 36 ++++++++++++++++++ src/main/webapp/dataset.xhtml | 1 + src/main/webapp/editFilesFragment.xhtml | 15 ++++---- src/main/webapp/editdatafiles.xhtml | 1 + src/main/webapp/resources/css/structure.css | 2 +- src/main/webapp/resources/images/folders.png | Bin 0 -> 787 bytes 9 files changed, 87 insertions(+), 22 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/util/WebloaderUtil.java create mode 100644 src/main/webapp/resources/images/folders.png diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 750636fec45..05069d34c67 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -57,6 +57,7 @@ import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.URLTokenUtil; +import edu.harvard.iq.dataverse.util.WebloaderUtil; import edu.harvard.iq.dataverse.validation.URLValidator; import edu.harvard.iq.dataverse.workflows.WorkflowComment; @@ -6068,23 +6069,16 @@ public void startGlobusTransfer() { public String getWebloaderUrlForDataset(Dataset d) { String localeCode = session.getLocaleCode(); - ApiToken apiToken = null; User user = session.getUser(); - if (user instanceof AuthenticatedUser) { - apiToken = authService.findApiTokenByUser((AuthenticatedUser) user); - - if ((apiToken == null) || (apiToken.getExpireTime().before(new Date()))) { - logger.fine("Created apiToken for user: " + user.getIdentifier()); - apiToken = authService.generateApiTokenForUser((AuthenticatedUser) user); - } + ApiToken apiToken = authService.getValidApiTokenForUser((AuthenticatedUser) user); + return WebloaderUtil.getWebloaderUrl(d, apiToken, localeCode, + settingsService.getValueForKey(SettingsServiceBean.Key.WebloaderUrl)); + } else { + // Shouldn't normally happen (seesion timeout? bug?) + logger.warning("getWebloaderUrlForDataset called for non-Authenticated user"); + return null; } - // Use URLTokenUtil for params currently in common with external tools. - URLTokenUtil tokenUtil = new URLTokenUtil(d, apiToken, localeCode); - String appUrl; - appUrl = settingsService.getValueForKey(SettingsServiceBean.Key.WebloaderUrl) - + "?datasetPid={datasetPid}&siteUrl={siteUrl}&key={apiToken}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}"; - return tokenUtil.replaceTokensWithValues(appUrl); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index 6cf294ffd6d..9845fa16526 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -5,7 +5,9 @@ import edu.harvard.iq.dataverse.api.AbstractApiBean; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.datasetutility.AddReplaceFileHelper; import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker; @@ -36,6 +38,8 @@ import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.JsfHelper; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.URLTokenUtil; +import edu.harvard.iq.dataverse.util.WebloaderUtil; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.EjbUtil; import edu.harvard.iq.dataverse.util.FileMetadataUtil; @@ -3067,6 +3071,10 @@ public boolean globusUploadSupported() { return settingsWrapper.isGlobusUpload() && settingsWrapper.isGlobusEnabledStorageDriver(dataset.getEffectiveStorageDriverId()); } + + public boolean webloaderUploadSupported() { + return settingsWrapper.isWebloaderUpload() && StorageIO.isDirectUploadEnabled(dataset.getEffectiveStorageDriverId()); + } private void populateFileMetadatas() { fileMetadatas = new ArrayList<>(); @@ -3106,4 +3114,18 @@ public void setFileAccessRequest(boolean fileAccessRequest) { public boolean isHasPublicStore() { return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, StorageIO.isPublicStore(dataset.getEffectiveStorageDriverId())); } + + public String getWebloaderUrlForDataset(Dataset d) { + String localeCode = session.getLocaleCode(); + User user = session.getUser(); + if (user instanceof AuthenticatedUser) { + ApiToken apiToken = authService.getValidApiTokenForUser((AuthenticatedUser) user); + return WebloaderUtil.getWebloaderUrl(d, apiToken, localeCode, + settingsService.getValueForKey(SettingsServiceBean.Key.WebloaderUrl)); + } else { + // Shouldn't normally happen (seesion timeout? bug?) + logger.warning("getWebloaderUrlForDataset called for non-Authenticated user"); + return null; + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java index b242cd2936f..f7b88147c05 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java @@ -938,4 +938,14 @@ public List getWorkflowCommentsByAuthenticatedUser(Authenticat return query.getResultList(); } + public ApiToken getValidApiTokenForUser(AuthenticatedUser user) { + ApiToken apiToken = null; + apiToken = findApiTokenByUser(user); + if ((apiToken == null) || (apiToken.getExpireTime().before(new Date()))) { + logger.fine("Created apiToken for user: " + user.getIdentifier()); + apiToken = generateApiTokenForUser(user); + } + return apiToken; + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/WebloaderUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/WebloaderUtil.java new file mode 100644 index 00000000000..266d55eceb3 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/WebloaderUtil.java @@ -0,0 +1,36 @@ +package edu.harvard.iq.dataverse.util; + +import java.util.Date; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map.Entry; +import java.util.logging.Logger; + +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpSession; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetPage; +import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.User; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; + +public class WebloaderUtil { + + private static final Logger logger = Logger.getLogger(WebloaderUtil.class.getCanonicalName()); + + /** + * Create the URL required to launch https://github.com/gdcc/dvweloader + */ + public static String getWebloaderUrl(Dataset d, ApiToken apiToken, String localeCode, String baseUrl) { + // Use URLTokenUtil for params currently in common with external tools. + URLTokenUtil tokenUtil = new URLTokenUtil(d, apiToken, localeCode); + String appUrl; + appUrl = baseUrl + + "?datasetPid={datasetPid}&siteUrl={siteUrl}&key={apiToken}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}"; + return tokenUtil.replaceTokensWithValues(appUrl); + } +} diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml index 1bb862721a5..35753374dbb 100644 --- a/src/main/webapp/dataset.xhtml +++ b/src/main/webapp/dataset.xhtml @@ -846,6 +846,7 @@ + diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml index 40937008ab8..a8d2bde1059 100644 --- a/src/main/webapp/editFilesFragment.xhtml +++ b/src/main/webapp/editFilesFragment.xhtml @@ -158,12 +158,13 @@ widgetVar="fileUploadWidget"> -
-

#{bundle['file.webloader.tip']}

-

#{bundle['file.webloaderAfterCreate.tip']}

- + +
+

#{bundle['file.fromWebloader.tip']}

+

#{bundle['file.fromWebloaderAfterCreate.tip']}

+
-
+

#{bundle['file.fromDropbox.tip']}

-
+

#{bundle['file.fromGlobus.tip']}

#{bundle['file.fromGlobusAfterCreate.tip']}

- +
diff --git a/src/main/webapp/editdatafiles.xhtml b/src/main/webapp/editdatafiles.xhtml index 6c4f07f51da..02acb224827 100644 --- a/src/main/webapp/editdatafiles.xhtml +++ b/src/main/webapp/editdatafiles.xhtml @@ -63,6 +63,7 @@ +
diff --git a/src/main/webapp/resources/css/structure.css b/src/main/webapp/resources/css/structure.css index 65489431d65..6ef6dfb29e5 100644 --- a/src/main/webapp/resources/css/structure.css +++ b/src/main/webapp/resources/css/structure.css @@ -883,7 +883,7 @@ div.panel-body.read-terms{max-height:220px; overflow-y:scroll; width:100%; backg #dragdropMsg {padding:20px;font-size:1.3em;color:#808080;text-align:center;} .dropin-btn-status.ui-icon {background: url("https://www.dropbox.com/static/images/widgets/dbx-saver-status.png") no-repeat;} .globus-btn.ui-icon {background: url("https://docs.globus.org/images/home/transfer.png") no-repeat;background-size:contain;display:inline-block;} -.webloader-btn.ui-icon {background: url("resources/images/folders.png") no-repeat;background-size:contain;display:inline-block;} +.webloader-btn.ui-icon {background: url("/resources/images/folders.png") no-repeat;background-size:contain;display:inline-block;} /* VERSIONS */ diff --git a/src/main/webapp/resources/images/folders.png b/src/main/webapp/resources/images/folders.png new file mode 100644 index 0000000000000000000000000000000000000000..a3dc36372803a113a1d6e562731cb96ef42d416a GIT binary patch literal 787 zcmV+u1MK{XP)00001b5ch_0Itp) z=>Px#1ZP1_K>z@;j|==^1poj532;bRa{vGi!vFvd!vV){sAK>D0;owuK~!i%?U=uc z>Od66<3+6PZ4^NyrT74XrSBjJ_JS`Uf{ng_jaA;kPPxI#%ErP!pavETJ4;za@3(qb z!ZjIZVwg#czrvlfA!NRs`OeHqY_r*DVdfA04J{u5s(EVxyWQ^F20}pB0t2fGVY;18 zhXPHrEUVdURw@;WgMq7ncDr4x)k>uj#hPZCCVuDhxvuN?|JX4Mqt$9rAQ4vqSTp&2 zo&pZm>vcRHr$8dk0`U4LlL-af3>t0hh~#MjXHc zO15DTdJnKpqu32-#DS6$1Xuz>f$$Q-IgducAoL#i2=K&Di){l{7|=CA6bOKerqe0f zyaGMkBYArECGP zEEYWle82?O3{?`gybW{*~haj=i%KCoJ?Xe18aCA%z&;>81oVN|PCiaD@* z1|C37~XB9(eKf3xOrXew=~~2z~E(YI+4{ zA^2EmH(;w(Ai!TDY2rZf5#WiRKJo#&CWsmn<22Rl^(eT}@AtiZ8dHz~^uC9~;jfK! zI^FGdDG^T4olYpo!0~u26bcmFK+i=ZCr+mmBRR4KU|X%%>v-i9b6}e0VzF@35c(>l zAOqQKmI7`tNw))q0s-Z6nF2m=3!|rzy@Z~gF8_l)dl1Q;7 Date: Tue, 25 Oct 2022 15:04:52 -0400 Subject: [PATCH 320/608] simplify - just use plus icon --- src/main/webapp/editFilesFragment.xhtml | 2 +- src/main/webapp/resources/css/structure.css | 2 -- src/main/webapp/resources/images/folders.png | Bin 787 -> 0 bytes 3 files changed, 1 insertion(+), 3 deletions(-) delete mode 100644 src/main/webapp/resources/images/folders.png diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml index a8d2bde1059..09ee7f50024 100644 --- a/src/main/webapp/editFilesFragment.xhtml +++ b/src/main/webapp/editFilesFragment.xhtml @@ -162,7 +162,7 @@

#{bundle['file.fromWebloader.tip']}

#{bundle['file.fromWebloaderAfterCreate.tip']}

- +
diff --git a/src/main/webapp/resources/css/structure.css b/src/main/webapp/resources/css/structure.css index 6ef6dfb29e5..0dff334833e 100644 --- a/src/main/webapp/resources/css/structure.css +++ b/src/main/webapp/resources/css/structure.css @@ -883,8 +883,6 @@ div.panel-body.read-terms{max-height:220px; overflow-y:scroll; width:100%; backg #dragdropMsg {padding:20px;font-size:1.3em;color:#808080;text-align:center;} .dropin-btn-status.ui-icon {background: url("https://www.dropbox.com/static/images/widgets/dbx-saver-status.png") no-repeat;} .globus-btn.ui-icon {background: url("https://docs.globus.org/images/home/transfer.png") no-repeat;background-size:contain;display:inline-block;} -.webloader-btn.ui-icon {background: url("/resources/images/folders.png") no-repeat;background-size:contain;display:inline-block;} - /* VERSIONS */ div[id$="versionsTable"] th.col-select-width * {display:none;} diff --git a/src/main/webapp/resources/images/folders.png b/src/main/webapp/resources/images/folders.png deleted file mode 100644 index a3dc36372803a113a1d6e562731cb96ef42d416a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 787 zcmV+u1MK{XP)00001b5ch_0Itp) z=>Px#1ZP1_K>z@;j|==^1poj532;bRa{vGi!vFvd!vV){sAK>D0;owuK~!i%?U=uc z>Od66<3+6PZ4^NyrT74XrSBjJ_JS`Uf{ng_jaA;kPPxI#%ErP!pavETJ4;za@3(qb z!ZjIZVwg#czrvlfA!NRs`OeHqY_r*DVdfA04J{u5s(EVxyWQ^F20}pB0t2fGVY;18 zhXPHrEUVdURw@;WgMq7ncDr4x)k>uj#hPZCCVuDhxvuN?|JX4Mqt$9rAQ4vqSTp&2 zo&pZm>vcRHr$8dk0`U4LlL-af3>t0hh~#MjXHc zO15DTdJnKpqu32-#DS6$1Xuz>f$$Q-IgducAoL#i2=K&Di){l{7|=CA6bOKerqe0f zyaGMkBYArECGP zEEYWle82?O3{?`gybW{*~haj=i%KCoJ?Xe18aCA%z&;>81oVN|PCiaD@* z1|C37~XB9(eKf3xOrXew=~~2z~E(YI+4{ zA^2EmH(;w(Ai!TDY2rZf5#WiRKJo#&CWsmn<22Rl^(eT}@AtiZ8dHz~^uC9~;jfK! zI^FGdDG^T4olYpo!0~u26bcmFK+i=ZCr+mmBRR4KU|X%%>v-i9b6}e0VzF@35c(>l zAOqQKmI7`tNw))q0s-Z6nF2m=3!|rzy@Z~gF8_l)dl1Q;7 Date: Tue, 25 Oct 2022 16:14:14 -0400 Subject: [PATCH 321/608] rename solr_srpt to geolocation and solr_bboxtype to boundingBox #8239 --- conf/solr/8.11.1/schema.xml | 12 +++++++----- doc/sphinx-guides/source/api/search.rst | 2 +- .../iq/dataverse/search/IndexServiceBean.java | 4 ++-- .../harvard/iq/dataverse/search/SearchFields.java | 6 +++--- .../iq/dataverse/search/SearchServiceBean.java | 2 +- 5 files changed, 14 insertions(+), 12 deletions(-) diff --git a/conf/solr/8.11.1/schema.xml b/conf/solr/8.11.1/schema.xml index 10f1d8f1f4f..655cf1bc3cc 100644 --- a/conf/solr/8.11.1/schema.xml +++ b/conf/solr/8.11.1/schema.xml @@ -228,6 +228,11 @@ + + + + + - - + @@ -1107,7 +1109,7 @@ --> - + diff --git a/doc/sphinx-guides/source/api/search.rst b/doc/sphinx-guides/source/api/search.rst index c4e62e05bb7..c2311ead089 100755 --- a/doc/sphinx-guides/source/api/search.rst +++ b/doc/sphinx-guides/source/api/search.rst @@ -742,7 +742,7 @@ For example, a query of the form .. code-block:: none - q=*.*&fq={!bbox sfield=solr_srpt}=&pt=10,10&d=5 + q=*.*&fq={!bbox sfield=geolocation}=&pt=10,10&d=5 would find datasets with information near the point latitude=10, longitude=10. diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index f5a5570c831..4661e9c1cd5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -1003,13 +1003,13 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set Date: Wed, 26 Oct 2022 15:41:41 -0400 Subject: [PATCH 322/608] add error checking for geo_point and geo_radius #8239 --- .../edu/harvard/iq/dataverse/api/Search.java | 14 ++++-- .../iq/dataverse/search/SearchUtil.java | 46 ++++++++++++++++++- .../harvard/iq/dataverse/api/SearchIT.java | 38 +++++++++++++-- .../iq/dataverse/search/SearchUtilTest.java | 38 +++++++++++++++ 4 files changed, 127 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Search.java b/src/main/java/edu/harvard/iq/dataverse/api/Search.java index 737fc7d1e20..cef509b1ec5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Search.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Search.java @@ -126,6 +126,14 @@ public Response search( geoPoint = getGeoPoint(geoPointRequested); geoRadius = getGeoRadius(geoRadiusRequested); + if (geoPoint != null && geoRadius == null) { + return error(Response.Status.BAD_REQUEST, "If you supply geo_point you must also supply geo_radius."); + } + + if (geoRadius != null && geoPoint == null) { + return error(Response.Status.BAD_REQUEST, "If you supply geo_radius you must also supply geo_point."); + } + } catch (Exception ex) { return error(Response.Status.BAD_REQUEST, ex.getLocalizedMessage()); } @@ -350,13 +358,11 @@ private Dataverse getSubtree(String alias) throws Exception { } private String getGeoPoint(String geoPointRequested) { - // TODO add error checking - return geoPointRequested; + return SearchUtil.getGeoPoint(geoPointRequested); } private String getGeoRadius(String geoRadiusRequested) { - // TODO add error checking - return geoRadiusRequested; + return SearchUtil.getGeoRadius(geoRadiusRequested); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java index c226d77f885..8a1045a842c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java @@ -181,5 +181,49 @@ public static String constructQuery(List queryStrings, boolean isAnd, bo return queryBuilder.toString().trim(); } - + + /** + * @return Null if supplied point is null or whitespace. + * @throws IllegalArgumentException If the lat/long is not separated by a + * comma. + * @throws NumberFormatException If the lat/long values are not numbers. + */ + public static String getGeoPoint(String userSuppliedGeoPoint) throws IllegalArgumentException, NumberFormatException { + if (userSuppliedGeoPoint == null || userSuppliedGeoPoint.isBlank()) { + return null; + } + String[] parts = userSuppliedGeoPoint.split(","); + // We'll supply our own errors but Solr gives a decent one: + // "Point must be in 'lat, lon' or 'x y' format: 42.3;-71.1" + if (parts.length != 2) { + String msg = "Must contain a single comma to separate latitude and longitude."; + throw new IllegalArgumentException(msg); + } + float latitude = Float.parseFloat(parts[0]); + float longitude = Float.parseFloat(parts[1]); + return latitude + "," + longitude; + } + + /** + * @return Null if supplied radius is null or whitespace. + * @throws NumberFormatException If the radius is not a positive number. + */ + public static String getGeoRadius(String userSuppliedGeoRadius) throws NumberFormatException { + if (userSuppliedGeoRadius == null || userSuppliedGeoRadius.isBlank()) { + return null; + } + float radius = 0; + try { + radius = Float.parseFloat(userSuppliedGeoRadius); + } catch (NumberFormatException ex) { + String msg = "Non-number radius supplied."; + throw new NumberFormatException(msg); + } + if (radius <= 0) { + String msg = "The supplied radius must be greater than zero."; + throw new NumberFormatException(msg); + } + return userSuppliedGeoRadius; + } + } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java index 0f2c77de717..fc3b911c0a5 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java @@ -17,6 +17,7 @@ import java.io.UnsupportedEncodingException; import java.util.Base64; import javax.json.JsonArray; +import static javax.ws.rs.core.Response.Status.BAD_REQUEST; import static javax.ws.rs.core.Response.Status.OK; import static javax.ws.rs.core.Response.Status.FORBIDDEN; import org.hamcrest.CoreMatchers; @@ -1222,18 +1223,47 @@ public void testGeospatialSearch() { Response search1 = UtilIT.search("id:dataset_" + datasetId + "_draft", apiToken, "&show_entity_ids=true&geo_point=41.9580775,-70.6621063&geo_radius=50"); search1.prettyPrint(); search1.then().assertThat() + .statusCode(OK.getStatusCode()) .body("data.total_count", CoreMatchers.is(1)) .body("data.count_in_response", CoreMatchers.is(1)) - .body("data.items[0].entity_id", CoreMatchers.is(datasetId)) - .statusCode(OK.getStatusCode()); + .body("data.items[0].entity_id", CoreMatchers.is(datasetId)); // Plymouth rock (41.9580775,-70.6621063) is not within 1 km of Cambridge. Miss. Response search2 = UtilIT.search("id:dataset_" + datasetId + "_draft", apiToken, "&geo_point=41.9580775,-70.6621063&geo_radius=1"); search2.prettyPrint(); search2.then().assertThat() + .statusCode(OK.getStatusCode()) .body("data.total_count", CoreMatchers.is(0)) - .body("data.count_in_response", CoreMatchers.is(0)) - .statusCode(OK.getStatusCode()); + .body("data.count_in_response", CoreMatchers.is(0)); + + } + + @Test + public void testGeospatialSearchInvalid() { + + Response noRadius = UtilIT.search("*", null, "&geo_point=40,60"); + noRadius.prettyPrint(); + noRadius.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()) + .body("message", CoreMatchers.equalTo("If you supply geo_point you must also supply geo_radius.")); + + Response noPoint = UtilIT.search("*", null, "&geo_radius=5"); + noPoint.prettyPrint(); + noPoint.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()) + .body("message", CoreMatchers.equalTo("If you supply geo_radius you must also supply geo_point.")); + + Response junkPoint = UtilIT.search("*", null, "&geo_point=junk&geo_radius=5"); + junkPoint.prettyPrint(); + junkPoint.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()) + .body("message", CoreMatchers.equalTo("Must contain a single comma to separate latitude and longitude.")); + + Response junkRadius = UtilIT.search("*", null, "&geo_point=40,60&geo_radius=junk"); + junkRadius.prettyPrint(); + junkRadius.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()) + .body("message", CoreMatchers.equalTo("Non-number radius supplied.")); } diff --git a/src/test/java/edu/harvard/iq/dataverse/search/SearchUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/search/SearchUtilTest.java index 525e03f8302..33f50c9a4c0 100644 --- a/src/test/java/edu/harvard/iq/dataverse/search/SearchUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/search/SearchUtilTest.java @@ -91,4 +91,42 @@ public void testdetermineFinalQuery() { assertEquals("*", SearchUtil.determineFinalQuery("")); assertEquals("foo", SearchUtil.determineFinalQuery("foo")); } + + @Test + public void testGetGeoPoint() { + // valid + assertEquals("42.3,-71.1", SearchUtil.getGeoPoint("42.3,-71.1")); + // user doesn't want geospatial search + assertEquals(null, SearchUtil.getGeoPoint(null)); + // invalid + assertThrows(IllegalArgumentException.class, () -> { + SearchUtil.getGeoRadius("42.3;-71.1"); + }, "Must have a comma."); + assertThrows(IllegalArgumentException.class, () -> { + SearchUtil.getGeoRadius("-71.187346,42.33661,-71.043056,42.409599"); + }, "Must have only one comma."); + assertThrows(IllegalArgumentException.class, () -> { + SearchUtil.getGeoRadius("junk"); + }, "Must have a comma."); + assertThrows(NumberFormatException.class, () -> { + SearchUtil.getGeoRadius("somejunk,morejunk"); + }, "Must be numbers."); + } + + @Test + public void testGetGeoRadius() { + // valid + assertEquals("5", SearchUtil.getGeoRadius("5")); + assertEquals("1.5", SearchUtil.getGeoRadius("1.5")); + // user doesn't want geospatial search + assertEquals(null, SearchUtil.getGeoRadius(null)); + assertEquals(null, SearchUtil.getGeoRadius("")); + // invalid + assertThrows(NumberFormatException.class, () -> { + SearchUtil.getGeoRadius("nonNumber"); + }, "Must be a number."); + assertThrows(NumberFormatException.class, () -> { + SearchUtil.getGeoRadius("-1"); + }, "Must be greater than zero."); + } } From 6e7499e7be3586b1129d9598716e4c2e6ba4b27d Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 26 Oct 2022 17:34:55 -0400 Subject: [PATCH 323/608] update docs and release note (supported via API) #8239 --- doc/release-notes/8239-geospatial-indexing.md | 6 +++++- doc/sphinx-guides/source/api/search.rst | 18 ++---------------- .../source/user/find-use-data.rst | 7 +++++++ 3 files changed, 14 insertions(+), 17 deletions(-) diff --git a/doc/release-notes/8239-geospatial-indexing.md b/doc/release-notes/8239-geospatial-indexing.md index 3e6ba0e7a07..165cb9031ba 100644 --- a/doc/release-notes/8239-geospatial-indexing.md +++ b/doc/release-notes/8239-geospatial-indexing.md @@ -1 +1,5 @@ -Support for indexing the Geographic Bounding Box fields from the Geospatial metadata block has been added. This allows trusted applications with access to solr to perform geospatial queries to find datasets, e.g. those near a given point. This is also a step towards enabling geospatial queries via the Dataverse API and UI. +Support for indexing the "Geographic Bounding Box" fields ("West Longitude", "East Longitude", "North Latitude", and "South Latitude") from the Geospatial metadata block has been added. + +Geospatial search is supported but only via API using two new parameters: `geo_point` and `geo_radius`. + +A Solr schema update is required. diff --git a/doc/sphinx-guides/source/api/search.rst b/doc/sphinx-guides/source/api/search.rst index c2311ead089..b941064f173 100755 --- a/doc/sphinx-guides/source/api/search.rst +++ b/doc/sphinx-guides/source/api/search.rst @@ -35,8 +35,8 @@ show_relevance boolean Whether or not to show details of which fields were ma show_facets boolean Whether or not to show facets that can be operated on by the "fq" parameter. False by default. See :ref:`advanced search example `. fq string A filter query on the search term. Multiple "fq" parameters can be used. See :ref:`advanced search example `. show_entity_ids boolean Whether or not to show the database IDs of the search results (for developer use). -geo_point string Latitude and longitude in the form ``geo_point=42.3,-71.1``. -geo_radius string Radial distance in kilometers such as ``geo_radius=5``. +geo_point string Latitude and longitude in the form ``geo_point=42.3,-71.1``. You must supply ``geo_radius`` as well. See also :ref:`geospatial-search`. +geo_radius string Radial distance in kilometers from ``geo_point`` (which must be supplied as well) such as ``geo_radius=1.5``. metadata_fields string Includes the requested fields for each dataset in the response. Multiple "metadata_fields" parameters can be used to include several fields. The value must be in the form "{metadata_block_name}:{field_name}" to include a specific field from a metadata block (see :ref:`example `) or "{metadata_field_set_name}:\*" to include all the fields for a metadata block (see :ref:`example `). "{field_name}" cannot be a subfield of a compound field. If "{field_name}" is a compound field, all subfields are included. =============== ======= =========== @@ -732,17 +732,3 @@ Output from iteration example CORS - - -Geospatial Indexing -------------------- - -Dataverse indexes the Geospatial Bounding Box field from the Geospatial metadatablock as a solr.BBoxField enabling `Spatial Search `_. This capability is not yet exposed through the Dataverse API or UI but can be accessed by trusted applications with direct solr access. -For example, a query of the form - -.. code-block:: none - - q=*.*&fq={!bbox sfield=geolocation}=&pt=10,10&d=5 - - -would find datasets with information near the point latitude=10, longitude=10. diff --git a/doc/sphinx-guides/source/user/find-use-data.rst b/doc/sphinx-guides/source/user/find-use-data.rst index 42e1a2b23d4..2e82a1482b4 100755 --- a/doc/sphinx-guides/source/user/find-use-data.rst +++ b/doc/sphinx-guides/source/user/find-use-data.rst @@ -39,6 +39,13 @@ enter search terms for Dataverse collections, dataset metadata (citation and dom metadata. If you are searching for tabular data files you can also search at the variable level for name and label. To find out more about what each field searches, hover over the field name for a detailed description of the field. +.. _geospatial-search: + +Geospatial Search +----------------- + +Geospatial search is available from the :doc:`/api/search` (look for "geo" parameters). The metadata fields that are geospatially indexed are "West Longitude", "East Longitude", "North Latitude", and "South Latitude" from the "Geographic Bounding Box" field in the "Geospatial Metadata" block. + Browsing a Dataverse Installation --------------------------------- From ebb138042f8b9134482a0a2119b9008f76ab80a1 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 28 Oct 2022 09:57:04 -0400 Subject: [PATCH 324/608] added org Phrases for DANS vs creating a second PR --- .../harvard/iq/dataverse/DatasetVersion.java | 2 +- .../iq/dataverse/util/PersonOrOrgUtil.java | 82 +++++++++++++++---- .../iq/dataverse/util/json/JsonUtil.java | 6 ++ .../dataverse/util/PersonOrOrgUtilTest.java | 18 +++- 4 files changed, 90 insertions(+), 18 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 1204d1dd4f1..c374204f73f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1809,7 +1809,7 @@ public String getJsonLd() { if (authorAffiliation != null) { affiliation = datasetAuthor.getAffiliation().getDisplayValue(); } - JsonObject entity = PersonOrOrgUtil.getPersonOrOrganization(name, (identifierAsUrl==null)); + JsonObject entity = PersonOrOrgUtil.getPersonOrOrganization(name, false, (identifierAsUrl==null)); String givenName= entity.containsKey("givenName") ? entity.getString("givenName"):null; String familyName= entity.containsKey("familyName") ? entity.getString("familyName"):null; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java index b8089422fcd..497cc689983 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java @@ -1,11 +1,18 @@ package edu.harvard.iq.dataverse.util; +import java.util.ArrayList; +import java.util.List; +import java.util.logging.Logger; + +import javax.json.JsonArray; import javax.json.JsonObject; import javax.json.JsonObjectBuilder; +import javax.json.JsonString; import edu.harvard.iq.dataverse.export.openaire.Cleanup; import edu.harvard.iq.dataverse.export.openaire.FirstNames; import edu.harvard.iq.dataverse.export.openaire.Organizations; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; /** @@ -18,36 +25,63 @@ * whether a name is that of a Person or Organization and, if the * former, to pull out the given and family names. * - * Adds a parameter that can improve accuracy, e.g. for curated - * repositories, allowing the code to assume that all Person entries are - * in , order. + * Adds parameters that can improve accuracy: + * + * * e.g. for curated repositories, allowing the code to assume that all + * Person entries are in , order. * - * Possible ToDo - one could also allow local configuration of specific - * words that will automatically categorize one-off cases that the - * algorithm would otherwise mis-categorize. For example, the code - * appears to not recognize names ending in "Project" as an - * Organization. + * * allow local configuration of specific words/phrases that will + * automatically categorize one-off cases that the algorithm would + * otherwise mis-categorize. For example, the code appears to not + * recognize names ending in "Project" as an Organization. * */ public class PersonOrOrgUtil { + private static final Logger logger = Logger.getLogger(PersonOrOrgUtil.class.getCanonicalName()); + static boolean assumeCommaInPersonName = false; + static List orgPhrases; static { setAssumeCommaInPersonName(Boolean.parseBoolean(System.getProperty("dataverse.personOrOrg.assumeCommaInPersonName", "false"))); + setOrgPhraseArray(System.getProperty("dataverse.personOrOrg.orgPhraseArray", null)); } - public static JsonObject getPersonOrOrganization(String name, boolean organizationIfTied) { + /** + * This method tries to determine if a name belongs to a person or an + * organization and, if it is a person, what the given and family names are. The + * core algorithm is adapted from a Datacite algorithm, see + * https://github.com/IQSS/dataverse/issues/2243#issuecomment-358615313 + * + * @param name + * - the name to test + * @param organizationIfTied + * - if a given name isn't found, should the name be assumed to be + * from an organization. This could be a generic true/false or + * information from some non-name aspect of the entity, e.g. which + * field is in use, or whether a .edu email exists, etc. + * @param isPerson + * - if this is known to be a person due to other info (i.e. they + * have an ORCID). In this case the algorithm is just looking for + * given/family names. + * @return + */ + public static JsonObject getPersonOrOrganization(String name, boolean organizationIfTied, boolean isPerson) { name = Cleanup.normalize(name); String givenName = null; String familyName = null; - // adapted from a Datacite algorithm, - // https://github.com/IQSS/dataverse/issues/2243#issuecomment-358615313 - boolean isOrganization = Organizations.getInstance().isOrganization(name); - // ToDo - could add a check of stop words to handle problem cases, i.e. if name - // contains something in that list, it is an org + + boolean isOrganization = !isPerson && Organizations.getInstance().isOrganization(name); + if (!isOrganization) { + for (String phrase : orgPhrases) { + if (name.contains(phrase)) { + isOrganization = true; + } + } + } if (name.contains(",")) { givenName = FirstNames.getInstance().getFirstName(name); // contributorName=, @@ -63,7 +97,7 @@ public static JsonObject getPersonOrOrganization(String name, boolean organizati } } else if (isOrganization || organizationIfTied) { isOrganization = true; - givenName=null; + givenName = null; } } else { @@ -94,6 +128,24 @@ public static JsonObject getPersonOrOrganization(String name, boolean organizati } + // Public for testing + public static void setOrgPhraseArray(String phraseArray) { + orgPhrases = new ArrayList(); + if (!StringUtil.isEmpty(phraseArray)) { + try { + JsonArray phrases = JsonUtil.getJsonArray(phraseArray); + phrases.forEach(val -> { + JsonString strVal = (JsonString) val; + orgPhrases.add(strVal.getString()); + }); + } catch (Exception e) { + logger.warning("Could not parse Org phrase list"); + } + } + + } + + // Public for testing public static void setAssumeCommaInPersonName(boolean assume) { assumeCommaInPersonName = assume; } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java index f4a3c635f8b..21ff0e03773 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java @@ -63,4 +63,10 @@ public static javax.json.JsonObject getJsonObject(String serializedJson) { return Json.createReader(rdr).readObject(); } } + + public static javax.json.JsonArray getJsonArray(String serializedJson) { + try (StringReader rdr = new StringReader(serializedJson)) { + return Json.createReader(rdr).readArray(); + } + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java index dbda622b536..b22f18ca787 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java @@ -27,6 +27,16 @@ public void testOrganizationCOMPLEXName() { verifyIsOrganization("The Ford Foundation"); verifyIsOrganization("United Nations Economic and Social Commission for Asia and the Pacific (UNESCAP)"); verifyIsOrganization("Michael J. Fox Foundation for Parkinson's Research"); + // The next example is one known to be asserted to be a Person without an entry + // in the OrgWordArray + // So we test with it in the array and then when the array is empty to verify + // the array works, resetting the array works, and the problem still exists in + // the underlying algorithm + PersonOrOrgUtil.setOrgPhraseArray("[\"Portable\"]"); + verifyIsOrganization("Portable Antiquities of the Netherlands"); + PersonOrOrgUtil.setOrgPhraseArray(null); + JsonObject obj = PersonOrOrgUtil.getPersonOrOrganization("Portable Antiquities of the Netherlands", false, false); + assertTrue(obj.getBoolean("isPerson")); } @Test @@ -79,7 +89,7 @@ public void testName() { } private void verifyIsOrganization(String fullName) { - JsonObject obj = PersonOrOrgUtil.getPersonOrOrganization(fullName, false); + JsonObject obj = PersonOrOrgUtil.getPersonOrOrganization(fullName, false, false); System.out.println(JsonUtil.prettyPrint(obj)); assertEquals(obj.getString("fullName"),fullName); assertFalse(obj.getBoolean("isPerson")); @@ -87,7 +97,11 @@ private void verifyIsOrganization(String fullName) { } private void verifyIsPerson(String fullName, String givenName, String familyName) { - JsonObject obj = PersonOrOrgUtil.getPersonOrOrganization(fullName, false); + verifyIsPerson(fullName, givenName, familyName, false); + } + + private void verifyIsPerson(String fullName, String givenName, String familyName, boolean isPerson) { + JsonObject obj = PersonOrOrgUtil.getPersonOrOrganization(fullName, false, isPerson); System.out.println(JsonUtil.prettyPrint(obj)); assertEquals(obj.getString("fullName"),fullName); assertTrue(obj.getBoolean("isPerson")); From 4dcd8ed8e68807fd0381170f8260b381383b3171 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 28 Oct 2022 11:14:34 -0400 Subject: [PATCH 325/608] fix affiliation value (no parens) --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index c374204f73f..b7eca85e95b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1807,7 +1807,7 @@ public String getJsonLd() { DatasetField authorAffiliation = datasetAuthor.getAffiliation(); String affiliation = null; if (authorAffiliation != null) { - affiliation = datasetAuthor.getAffiliation().getDisplayValue(); + affiliation = datasetAuthor.getAffiliation().getValue(); } JsonObject entity = PersonOrOrgUtil.getPersonOrOrganization(name, false, (identifierAsUrl==null)); String givenName= entity.containsKey("givenName") ? entity.getString("givenName"):null; From 4e6f065f272d0f74cbba19e19c04e4344cccc8fb Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 28 Oct 2022 11:17:16 -0400 Subject: [PATCH 326/608] use brandname for catalog --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 30815c43381..15d97dde55f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1957,13 +1957,14 @@ public String getJsonLd() { job.add("license",DatasetUtil.getLicenseURI(this)); } + String installationBrandName = BrandingUtil.getInstallationBrandName(); + job.add("includedInDataCatalog", Json.createObjectBuilder() .add("@type", "DataCatalog") - .add("name", BrandingUtil.getRootDataverseCollectionName()) + .add("name", installationBrandName) .add("url", SystemConfig.getDataverseSiteUrlStatic()) ); - - String installationBrandName = BrandingUtil.getInstallationBrandName(); + /** * Both "publisher" and "provider" are included but they have the same * values. Some services seem to prefer one over the other. From 0184b3d9afce7d83db4c6b0bb6e5956f0daa8b4b Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 28 Oct 2022 12:19:23 -0400 Subject: [PATCH 327/608] logic fix --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index b7eca85e95b..061712f6864 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1809,7 +1809,7 @@ public String getJsonLd() { if (authorAffiliation != null) { affiliation = datasetAuthor.getAffiliation().getValue(); } - JsonObject entity = PersonOrOrgUtil.getPersonOrOrganization(name, false, (identifierAsUrl==null)); + JsonObject entity = PersonOrOrgUtil.getPersonOrOrganization(name, false, (identifierAsUrl!=null)); String givenName= entity.containsKey("givenName") ? entity.getString("givenName"):null; String familyName= entity.containsKey("familyName") ? entity.getString("familyName"):null; From 545a295764e71f63dc0b3d6480805801f1ef51f6 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 28 Oct 2022 12:40:05 -0400 Subject: [PATCH 328/608] comma check shouldn't override isPerson --- .../java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java index 497cc689983..bacbb705721 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java @@ -101,7 +101,7 @@ public static JsonObject getPersonOrOrganization(String name, boolean organizati } } else { - if (assumeCommaInPersonName) { + if (assumeCommaInPersonName && !isPerson) { isOrganization = true; } else { givenName = FirstNames.getInstance().getFirstName(name); From ab2326c38aef3f76d1ee824606fcad8c73bc2944 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 28 Oct 2022 13:18:29 -0400 Subject: [PATCH 329/608] always set givenName null for Org --- src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java index bacbb705721..3a8088aac77 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java @@ -115,6 +115,7 @@ public static JsonObject getPersonOrOrganization(String name, boolean organizati // default if (isOrganization || organizationIfTied) { isOrganization = true; + givenName=null; } } } From 0d541064d17d4b8d64d61db617e0d541613ec711 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 28 Oct 2022 14:24:46 -0400 Subject: [PATCH 330/608] optimize - break out of loop when done --- src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java index 3a8088aac77..da33fc9597e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java @@ -79,6 +79,7 @@ public static JsonObject getPersonOrOrganization(String name, boolean organizati for (String phrase : orgPhrases) { if (name.contains(phrase)) { isOrganization = true; + break; } } } From 1d935fe580284384328f8374c9f223f71916c4c6 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 28 Oct 2022 14:55:40 -0400 Subject: [PATCH 331/608] documentation of new options --- doc/sphinx-guides/source/admin/metadataexport.rst | 10 ++++++++++ doc/sphinx-guides/source/installation/config.rst | 11 ++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/admin/metadataexport.rst b/doc/sphinx-guides/source/admin/metadataexport.rst index 78b8c8ce223..200c3a3e342 100644 --- a/doc/sphinx-guides/source/admin/metadataexport.rst +++ b/doc/sphinx-guides/source/admin/metadataexport.rst @@ -57,3 +57,13 @@ Downloading Metadata via API ---------------------------- The :doc:`/api/native-api` section of the API Guide explains how end users can download the metadata formats above via API. + +Exporter Configuration +---------------------- + +Two exporters - Schema.org JSONLD and OpenAire - use an algorithm to determine whether an author, or contact, name belongs to a person or organization. While the algorithm works well, there are cases in which it makes mistakes, usually inferring that an organization is a person. + +The Dataverse software implements two jvm-options that can be used to tune the algorithm: + +- :ref:`dataverse.personOrOrg.assumeCommaInPersonName` - boolean, default false. If true, Dataverse will assume any name without a comma must be an organization. This may be most useful for curated Dataverse instances that enforce the "family name, given name" convention. +- :ref:`dataverse.personOrOrg.orgPhraseArray` - a JsonArray of strings. Any name that contains one of the strings is assumed to be an organization. For example, "Project" is a word that is not otherwise associated with being an organization. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 3e01f372c9b..5d4d29271f9 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1662,9 +1662,18 @@ dataverse.personOrOrg.assumeCommaInPersonName Please note that this setting is experimental. -The Schema.org metadata export and the Schema.org metadata included in DatasetPages tries to infer whether each entry in the Author field is a Person or Organization. If you are sure that +The Schema.org metadata and OpenAIRE exports and the Schema.org metadata included in DatasetPages try to infer whether each entry in the various fields (e.g. Author, Contributor) is a Person or Organization. If you are sure that users are following the guidance to add people in the recommended family name, given name order, with a comma, you can set this true to always assume entries without a comma are for Organizations. The default is false. +dataverse.personOrOrg.orgPhraseArray +++++++++++++++++++++++++++++++++++++ + +Please note that this setting is experimental. + +The Schema.org metadata and OpenAIRE exports and the Schema.org metadata included in DatasetPages try to infer whether each entry in the various fields (e.g. Author, Contributor) is a Person or Organization. +If you have examples where an orgization name is being inferred to belong to a person, you can use this setting to force it to be recognized as an organization. +The value is expected to be a JsonArray of strings. Any name that contains one of the strings is assumed to be an organization. For example, "Project" is a word that is not otherwise associated with being an organization. + .. _:ApplicationServerSettings: Application Server Settings From a5ae4d782c63ba71a72f0da1748b7f62e1904434 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 28 Oct 2022 14:59:59 -0400 Subject: [PATCH 332/608] add labels --- doc/sphinx-guides/source/installation/config.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 5d4d29271f9..96397b707ff 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1657,6 +1657,8 @@ This setting is useful in cases such as running your Dataverse installation behi "HTTP_VIA", "REMOTE_ADDR" +.. _dataverse.personOrOrg.assumeCommaInPersonName: + dataverse.personOrOrg.assumeCommaInPersonName +++++++++++++++++++++++++++++++++++++++++++++ @@ -1665,6 +1667,8 @@ Please note that this setting is experimental. The Schema.org metadata and OpenAIRE exports and the Schema.org metadata included in DatasetPages try to infer whether each entry in the various fields (e.g. Author, Contributor) is a Person or Organization. If you are sure that users are following the guidance to add people in the recommended family name, given name order, with a comma, you can set this true to always assume entries without a comma are for Organizations. The default is false. +.. _dataverse.personOrOrg.orgPhraseArray: + dataverse.personOrOrg.orgPhraseArray ++++++++++++++++++++++++++++++++++++ From 7274d1f61bdc9f2d4ae9c6e935b802e258fb9002 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 28 Oct 2022 15:34:10 -0400 Subject: [PATCH 333/608] Add quotes around CVoc term URIs on advanced search page --- .../iq/dataverse/search/AdvancedSearchPage.java | 3 ++- .../edu/harvard/iq/dataverse/search/SearchUtil.java | 11 ++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/AdvancedSearchPage.java b/src/main/java/edu/harvard/iq/dataverse/search/AdvancedSearchPage.java index a7a89def449..ef37569ac54 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/AdvancedSearchPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/AdvancedSearchPage.java @@ -111,7 +111,8 @@ private String constructDatasetQuery() { List queryStrings = new ArrayList<>(); for (DatasetFieldType dsfType : metadataFieldList) { if (dsfType.getSearchValue() != null && !dsfType.getSearchValue().equals("")) { - queryStrings.add(constructQuery(dsfType.getSolrField().getNameSearchable(), dsfType.getSearchValue())); + //CVoc fields return term URIs - add quotes around them to avoid solr breaking them into individual search words + queryStrings.add(constructQuery(dsfType.getSolrField().getNameSearchable(), dsfType.getSearchValue(), getCVocConf().containsKey(dsfType.getId()))); } else if (dsfType.getListValues() != null && !dsfType.getListValues().isEmpty()) { List listQueryStrings = new ArrayList<>(); for (String value : dsfType.getListValues()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java index c226d77f885..dedb5457173 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java @@ -117,6 +117,10 @@ public static String determineFinalQuery(String userSuppliedQuery) { } public static String constructQuery(String solrField, String userSuppliedQuery) { + return constructQuery(solrField, userSuppliedQuery, false); + } + + public static String constructQuery(String solrField, String userSuppliedQuery, boolean addQuotes) { StringBuilder queryBuilder = new StringBuilder(); String delimiter = "[\"]+"; @@ -134,7 +138,12 @@ public static String constructQuery(String solrField, String userSuppliedQuery) } else { StringTokenizer st = new StringTokenizer(userSuppliedQuery); while (st.hasMoreElements()) { - queryStrings.add(solrField + ":" + st.nextElement()); + String nextElement = (String) st.nextElement(); + //Entries such as URIs will get tokenized into individual words by solr unless they are in quotes + if(addQuotes) { + nextElement = "\"" + nextElement + "\""; + } + queryStrings.add(solrField + ":" + nextElement); } } } From c80a06f38b679641d80a05b5215106006da52667 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 28 Oct 2022 18:07:07 -0400 Subject: [PATCH 334/608] fix for cvv and editMetadata replace=true, and test --- .../source/_static/api/dataset-add-subject-metadata.json | 2 +- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 1 + .../java/edu/harvard/iq/dataverse/api/DatasetsIT.java | 8 +++++--- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/doc/sphinx-guides/source/_static/api/dataset-add-subject-metadata.json b/doc/sphinx-guides/source/_static/api/dataset-add-subject-metadata.json index ea0922dadc8..c81c5b32aab 100644 --- a/doc/sphinx-guides/source/_static/api/dataset-add-subject-metadata.json +++ b/doc/sphinx-guides/source/_static/api/dataset-add-subject-metadata.json @@ -2,7 +2,7 @@ "typeName": "subject", "value": ["Astronomy and Astrophysics", "Agricultural Sciences", -"Arts and Humanities", "Physics"] +"Arts and Humanities", "Physics", "Mathematical Sciences"] } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index aff543e643c..2ae4544ae68 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -986,6 +986,7 @@ private Response processDatasetUpdate(String jsonBody, String id, DataverseReque dsf.setSingleValue(""); dsf.setSingleControlledVocabularyValue(null); } + cvvDisplay=""; } if (updateField.getDatasetFieldType().isControlledVocabulary()) { if (dsf.getDatasetFieldType().isAllowMultiples()) { diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 529af5f746c..326b3963217 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -67,6 +67,7 @@ import javax.xml.stream.XMLStreamReader; import static org.junit.Assert.assertEquals; import org.hamcrest.CoreMatchers; +import static org.hamcrest.CoreMatchers.containsString; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.startsWith; import static org.hamcrest.CoreMatchers.nullValue; @@ -76,7 +77,7 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import static org.junit.matchers.JUnitMatchers.containsString; + public class DatasetsIT { @@ -272,9 +273,10 @@ public void testAddUpdateDatasetViaNativeAPI() { String pathToJsonFileSingle = "doc/sphinx-guides/source/_static/api/dataset-simple-update-metadata.json"; Response addSubjectSingleViaNative = UtilIT.updateFieldLevelDatasetMetadataViaNative(datasetPersistentId, pathToJsonFileSingle, apiToken); - addSubjectSingleViaNative.prettyPrint(); + String responseString = addSubjectSingleViaNative.prettyPrint(); addSubjectSingleViaNative.then().assertThat() - .statusCode(OK.getStatusCode()); + .statusCode(OK.getStatusCode()).body(containsString("Mathematical Sciences")).body(containsString("Social Sciences")); + //Trying to blank out required field should fail... From 5993be8a5adecd41bf138e35ceda212cac522f92 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 28 Oct 2022 18:15:35 -0400 Subject: [PATCH 335/608] check math exists before update --- src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 326b3963217..18afb88fb3b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -268,7 +268,7 @@ public void testAddUpdateDatasetViaNativeAPI() { addSubjectViaNative = UtilIT.addDatasetMetadataViaNative(datasetPersistentId, pathToJsonFile, apiToken); addSubjectViaNative.prettyPrint(); addSubjectViaNative.then().assertThat() - .statusCode(OK.getStatusCode()); + .statusCode(OK.getStatusCode()).body(containsString("Mathematical Sciences")); String pathToJsonFileSingle = "doc/sphinx-guides/source/_static/api/dataset-simple-update-metadata.json"; From aa321f3769e53c7b1c65e2f82c7f2bd26bb78b61 Mon Sep 17 00:00:00 2001 From: chenganj Date: Wed, 2 Nov 2022 12:38:23 -0400 Subject: [PATCH 336/608] handled class cast exception --- .../iq/dataverse/dataset/DatasetUtil.java | 42 ++++++------------- .../migration/V4.13.0.1__3575-usernames.sql | 2 +- ...16.0.1__5303-addColumn-to-settingTable.sql | 6 ++- src/main/webapp/dataset-license-terms.xhtml | 6 +-- .../webapp/datasetLicenseInfoFragment.xhtml | 4 +- 5 files changed, 24 insertions(+), 36 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java index 31e45aebf18..75cde7b4bd9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java @@ -547,7 +547,7 @@ public static License getLicense(DatasetVersion dsv) { public static String getLicenseName(DatasetVersion dsv) { License license = DatasetUtil.getLicense(dsv); - return license != null ? license.getName() + return license != null ? getLocalizedLicenseDetails(license.getName(),".name") : BundleUtil.getStringFromBundle("license.custom"); } @@ -573,41 +573,25 @@ public static String getLicenseIcon(DatasetVersion dsv) { public static String getLicenseDescription(DatasetVersion dsv) { License license = DatasetUtil.getLicense(dsv); - - if (license != null) { - return getLocalizedLicense(license.getName(),"description") ; - } else { - return BundleUtil.getStringFromBundle("license.custom.description"); - } + return license != null ? getLocalizedLicenseDetails(license.getName(),".description") : BundleUtil.getStringFromBundle("license.custom.description"); } - public static String getLocalizedLicense(String licenseName,String keyPart) { - String key = "license." + licenseName.toLowerCase().replace(" ", "_") + "." + keyPart; + public static String getLocalizedLicenseDetails(String licenseName,String keyPart) { + String key = "license." + licenseName.toLowerCase().replace(" ", "_") + keyPart; - String second_key = ""; - if (keyPart == "description") - { - second_key = "license.custom.description"; + String localizedLicenseValue = "" ; + try { + localizedLicenseValue = BundleUtil.getStringFromPropertyFile(key, "License"); } - else - { - second_key = "license.custom"; + catch (Exception e) { + localizedLicenseValue = licenseName.toLowerCase(); } - if (key != null) { - try { - String propertyValue = BundleUtil.getStringFromPropertyFile(key, "License"); - if (propertyValue == null) { - return BundleUtil.getStringFromBundle(second_key); - } else { - return propertyValue; - } - } catch (MissingResourceException mre) { - return BundleUtil.getStringFromBundle(second_key); - } - } else { - return BundleUtil.getStringFromBundle(second_key); + if (localizedLicenseValue == null) { + localizedLicenseValue = licenseName.toLowerCase() ; } + return localizedLicenseValue; + } public static String getLocaleExternalStatus(String status) { diff --git a/src/main/resources/db/migration/V4.13.0.1__3575-usernames.sql b/src/main/resources/db/migration/V4.13.0.1__3575-usernames.sql index 9e35623c455..0b1804bdfc4 100644 --- a/src/main/resources/db/migration/V4.13.0.1__3575-usernames.sql +++ b/src/main/resources/db/migration/V4.13.0.1__3575-usernames.sql @@ -1 +1 @@ -CREATE UNIQUE INDEX IF NOT EXISTS index_authenticateduser_lower_useridentifier ON authenticateduser (lower(useridentifier)); +CREATE UNIQUE INDEX index_authenticateduser_lower_useridentifier ON authenticateduser (lower(useridentifier)); diff --git a/src/main/resources/db/migration/V4.16.0.1__5303-addColumn-to-settingTable.sql b/src/main/resources/db/migration/V4.16.0.1__5303-addColumn-to-settingTable.sql index db08efdab7e..8309dacf486 100644 --- a/src/main/resources/db/migration/V4.16.0.1__5303-addColumn-to-settingTable.sql +++ b/src/main/resources/db/migration/V4.16.0.1__5303-addColumn-to-settingTable.sql @@ -4,6 +4,10 @@ ALTER TABLE setting ADD COLUMN IF NOT EXISTS ID SERIAL PRIMARY KEY; ALTER TABLE setting ADD COLUMN IF NOT EXISTS lang text; -CREATE UNIQUE INDEX IF NOT EXISTS unique_settings +ALTER TABLE setting + ADD CONSTRAINT non_empty_lang + CHECK (lang <> ''); + +CREATE UNIQUE INDEX unique_settings ON setting (name, coalesce(lang, '')); diff --git a/src/main/webapp/dataset-license-terms.xhtml b/src/main/webapp/dataset-license-terms.xhtml index 429dee9b14a..760f39d7170 100644 --- a/src/main/webapp/dataset-license-terms.xhtml +++ b/src/main/webapp/dataset-license-terms.xhtml @@ -46,7 +46,7 @@

+ var="license" itemLabel="#{DatasetUtil:getLocalizedLicenseDetails(license.name, '.name')}" itemValue="#{license}"/> @@ -55,8 +55,8 @@

- - #{termsOfUseAndAccess.license.name} + + #{DatasetUtil:getLocalizedLicenseDetails(termsOfUseAndAccess.license.name,'.name')}

diff --git a/src/main/webapp/datasetLicenseInfoFragment.xhtml b/src/main/webapp/datasetLicenseInfoFragment.xhtml index 797d20b8a25..e7a393a8ae7 100644 --- a/src/main/webapp/datasetLicenseInfoFragment.xhtml +++ b/src/main/webapp/datasetLicenseInfoFragment.xhtml @@ -30,12 +30,12 @@ xmlns:jsf="http://xmlns.jcp.org/jsf">
+ jsf:rendered="#{!empty DatasetUtil:getLocalizedLicenseDetails(DatasetPage.workingVersion.termsOfUseAndAccess.license.name,'.description')} }">
- +
From 4c25878b6d7e903274b5d25623548867783acf17 Mon Sep 17 00:00:00 2001 From: cstr Date: Thu, 3 Nov 2022 15:02:44 +0800 Subject: [PATCH 337/608] Update citation.properties --- src/main/java/propertyFiles/citation.properties | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/propertyFiles/citation.properties b/src/main/java/propertyFiles/citation.properties index ef8b44d7114..f35ede79b50 100644 --- a/src/main/java/propertyFiles/citation.properties +++ b/src/main/java/propertyFiles/citation.properties @@ -251,7 +251,7 @@ controlledvocabulary.subject.social_sciences=Social Sciences controlledvocabulary.subject.other=Other controlledvocabulary.publicationIDType.ark=ark controlledvocabulary.publicationIDType.arxiv=arXiv -controlledvocabulary.publicationIDType.cstr=CSTR +controlledvocabulary.publicationIDType.cstr=cstr controlledvocabulary.publicationIDType.bibcode=bibcode controlledvocabulary.publicationIDType.doi=doi controlledvocabulary.publicationIDType.ean13=ean13 @@ -346,7 +346,7 @@ controlledvocabulary.language.galician=Galician controlledvocabulary.language.georgian=Georgian controlledvocabulary.language.german=German controlledvocabulary.language.greek_(modern)=Greek (modern) -controlledvocabulary.language.guarani=Guaraní +controlledvocabulary.language.guarani=Guaraní controlledvocabulary.language.gujarati=Gujarati controlledvocabulary.language.haitian,_haitian_creole=Haitian, Haitian Creole controlledvocabulary.language.hausa=Hausa @@ -406,7 +406,7 @@ controlledvocabulary.language.navajo,_navaho=Navajo, Navaho controlledvocabulary.language.northern_ndebele=Northern Ndebele controlledvocabulary.language.nepali=Nepali controlledvocabulary.language.ndonga=Ndonga -controlledvocabulary.language.norwegian_bokmal=Norwegian Bokmål +controlledvocabulary.language.norwegian_bokmal=Norwegian BokmÃ¥l controlledvocabulary.language.norwegian_nynorsk=Norwegian Nynorsk controlledvocabulary.language.norwegian=Norwegian controlledvocabulary.language.nuosu=Nuosu @@ -468,7 +468,7 @@ controlledvocabulary.language.urdu=Urdu controlledvocabulary.language.uzbek=Uzbek controlledvocabulary.language.venda=Venda controlledvocabulary.language.vietnamese=Vietnamese -controlledvocabulary.language.volapuk=Volapük +controlledvocabulary.language.volapuk=Volapük controlledvocabulary.language.walloon=Walloon controlledvocabulary.language.welsh=Welsh controlledvocabulary.language.wolof=Wolof @@ -478,4 +478,4 @@ controlledvocabulary.language.yiddish=Yiddish controlledvocabulary.language.yoruba=Yoruba controlledvocabulary.language.zhuang,_chuang=Zhuang, Chuang controlledvocabulary.language.zulu=Zulu -controlledvocabulary.language.not_applicable=Not applicable \ No newline at end of file +controlledvocabulary.language.not_applicable=Not applicable From b8327eec4897bfef2366525651e6174d6b580cda Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 3 Nov 2022 09:16:28 -0400 Subject: [PATCH 338/608] 9121 fix for search display with ext. CVoc --- src/main/webapp/search-include-fragment.xhtml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/main/webapp/search-include-fragment.xhtml b/src/main/webapp/search-include-fragment.xhtml index f70356aa393..a6e344afb8c 100644 --- a/src/main/webapp/search-include-fragment.xhtml +++ b/src/main/webapp/search-include-fragment.xhtml @@ -594,7 +594,15 @@
- + + + + + + + + +
From e068cabe90effe55e3078101dc85e91de2310eff Mon Sep 17 00:00:00 2001 From: chenganj Date: Thu, 3 Nov 2022 11:38:08 -0400 Subject: [PATCH 339/608] correction --- .../java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java index 75cde7b4bd9..fecfdc2bcfb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java @@ -584,11 +584,11 @@ public static String getLocalizedLicenseDetails(String licenseName,String keyPar localizedLicenseValue = BundleUtil.getStringFromPropertyFile(key, "License"); } catch (Exception e) { - localizedLicenseValue = licenseName.toLowerCase(); + localizedLicenseValue = licenseName; } if (localizedLicenseValue == null) { - localizedLicenseValue = licenseName.toLowerCase() ; + localizedLicenseValue = licenseName ; } return localizedLicenseValue; From 57dd54ae807d0d30d83b5d5a6064ab79c820f46c Mon Sep 17 00:00:00 2001 From: chenganj Date: Thu, 3 Nov 2022 12:00:55 -0400 Subject: [PATCH 340/608] added additional doc --- doc/sphinx-guides/source/installation/config.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 3cdac253cb3..d2ef3a165cf 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1044,6 +1044,14 @@ On a new Dataverse installation, users may select from the following licenses or (Note that existing Dataverse installations which are upgraded from 5.9 or previous will only offer CC0 1.0, added automatically during the upgrade to version 5.10.) +If the Dataverse Installation supports multiple languages, the license name/description translations should be added to the ``License`` properties files. (See :ref:`i18n` for more on properties files and internationalization in general.) +To create the key, the license name has to be converted to lowercase, replace space with underscore. + +Example:: + + license.cc0_1.0.description=Creative Commons CC0 1.0 Universal Public Domain Dedication. + license.cc0_1.0.name=CC0 1.0 + You have a lot of control over which licenses and terms are available. You can remove licenses and add new ones. You can decide which license is the default. You can remove "Custom Dataset Terms" as a option. You can remove all licenses and make "Custom Dataset Terms" the only option. Before making changes, you are encouraged to read the :ref:`license-terms` section of the User Guide about why CC0 is the default and what the "Custom Dataset Terms" option allows. From ee1e0c8019d1cec82750f4e9454fd6ca264a520d Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 3 Nov 2022 17:59:06 +0100 Subject: [PATCH 341/608] build(ct-base): switch to Payara 5.2022.4 The upgrade to 5.2022.3 made Dataverse deployments fail because the postboot script deployment method was broken. This has been fixed with 5.2022.4, which is why we use this version now. --- modules/dataverse-parent/pom.xml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 4ffc5941278..fe50601d583 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -337,7 +337,11 @@ ct - + + 5.2022.4 From 7d9327edbf194049c1233b12fed6c0ade8dc518d Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 3 Nov 2022 17:39:55 -0400 Subject: [PATCH 342/608] Refactored permissions checks and fixed workflow token access --- .../edu/harvard/iq/dataverse/api/Access.java | 334 +++++------------- 1 file changed, 96 insertions(+), 238 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index abeedf23b59..321b3ebfab6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -187,9 +187,6 @@ public class Access extends AbstractApiBean { @Inject MakeDataCountLoggingServiceBean mdcLogService; - - private static final String API_KEY_HEADER = "X-Dataverse-key"; - //@EJB // TODO: @@ -197,23 +194,19 @@ public class Access extends AbstractApiBean { @Path("datafile/bundle/{fileId}") @GET @Produces({"application/zip"}) - public BundleDownloadInstance datafileBundle(@PathParam("fileId") String fileId, @QueryParam("fileMetadataId") Long fileMetadataId,@QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { + public BundleDownloadInstance datafileBundle(@PathParam("fileId") String fileId, @QueryParam("fileMetadataId") Long fileMetadataId,@QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { GuestbookResponse gbr = null; DataFile df = findDataFileOrDieWrapper(fileId); - if (apiToken == null || apiToken.equals("")) { - apiToken = headers.getHeaderString(API_KEY_HEADER); - } - // This will throw a ForbiddenException if access isn't authorized: - checkAuthorization(df, apiToken); + checkAuthorization(df); if (gbrecs != true && df.isReleased()){ // Write Guestbook record if not done previously and file is released - User apiTokenUser = findAPITokenUser(apiToken); + User apiTokenUser = findAPITokenUser(); gbr = guestbookResponseService.initAPIGuestbookResponse(df.getOwner(), df, session, apiTokenUser); guestbookResponseService.save(gbr); MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, df); @@ -278,7 +271,7 @@ private DataFile findDataFileOrDieWrapper(String fileId){ @Path("datafile/{fileId:.+}") @GET @Produces({"application/xml"}) - public Response datafile(@PathParam("fileId") String fileId, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { + public Response datafile(@PathParam("fileId") String fileId, @QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { // check first if there's a trailing slash, and chop it: while (fileId.lastIndexOf('/') == fileId.length() - 1) { @@ -303,20 +296,16 @@ public Response datafile(@PathParam("fileId") String fileId, @QueryParam("gbrecs throw new NotFoundException(errorMessage); // (nobody should ever be using this API on a harvested DataFile)! } - - if (apiToken == null || apiToken.equals("")) { - apiToken = headers.getHeaderString(API_KEY_HEADER); - } - + + // This will throw a ForbiddenException if access isn't authorized: + checkAuthorization(df); + if (gbrecs != true && df.isReleased()){ // Write Guestbook record if not done previously and file is released - User apiTokenUser = findAPITokenUser(apiToken); + User apiTokenUser = findAPITokenUser(); gbr = guestbookResponseService.initAPIGuestbookResponse(df.getOwner(), df, session, apiTokenUser); } - - // This will throw a ForbiddenException if access isn't authorized: - checkAuthorization(df, apiToken); - + DownloadInfo dInfo = new DownloadInfo(df); logger.fine("checking if thumbnails are supported on this file."); @@ -532,11 +521,10 @@ public String tabularDatafileMetadataDDI(@PathParam("fileId") String fileId, @Q @Path("datafile/{fileId}/auxiliary") @GET public Response listDatafileMetadataAux(@PathParam("fileId") String fileId, - @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws ServiceUnavailableException { - return listAuxiliaryFiles(fileId, null, apiToken, uriInfo, headers, response); + return listAuxiliaryFiles(fileId, null, uriInfo, headers, response); } /* * GET method for retrieving a list auxiliary files associated with @@ -547,26 +535,21 @@ public Response listDatafileMetadataAux(@PathParam("fileId") String fileId, @GET public Response listDatafileMetadataAuxByOrigin(@PathParam("fileId") String fileId, @PathParam("origin") String origin, - @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws ServiceUnavailableException { - return listAuxiliaryFiles(fileId, origin, apiToken, uriInfo, headers, response); + return listAuxiliaryFiles(fileId, origin, uriInfo, headers, response); } - private Response listAuxiliaryFiles(String fileId, String origin, String apiToken, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response) { + private Response listAuxiliaryFiles(String fileId, String origin, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response) { DataFile df = findDataFileOrDieWrapper(fileId); - if (apiToken == null || apiToken.equals("")) { - apiToken = headers.getHeaderString(API_KEY_HEADER); - } - List auxFileList = auxiliaryFileService.findAuxiliaryFiles(df, origin); if (auxFileList == null || auxFileList.isEmpty()) { throw new NotFoundException("No Auxiliary files exist for datafile " + fileId + (origin==null ? "": " and the specified origin")); } - boolean isAccessAllowed = isAccessAuthorized(df, apiToken); + boolean isAccessAllowed = isAccessAuthorized(df); JsonArrayBuilder jab = Json.createArrayBuilder(); auxFileList.forEach(auxFile -> { if (isAccessAllowed || auxFile.getIsPublic()) { @@ -594,17 +577,12 @@ private Response listAuxiliaryFiles(String fileId, String origin, String apiToke public DownloadInstance downloadAuxiliaryFile(@PathParam("fileId") String fileId, @PathParam("formatTag") String formatTag, @PathParam("formatVersion") String formatVersion, - @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws ServiceUnavailableException { DataFile df = findDataFileOrDieWrapper(fileId); - if (apiToken == null || apiToken.equals("")) { - apiToken = headers.getHeaderString(API_KEY_HEADER); - } - DownloadInfo dInfo = new DownloadInfo(df); boolean publiclyAvailable = false; @@ -654,7 +632,7 @@ public DownloadInstance downloadAuxiliaryFile(@PathParam("fileId") String fileId // as defined for the DataFile itself), and will throw a ForbiddenException // if access is denied: if (!publiclyAvailable) { - checkAuthorization(df, apiToken); + checkAuthorization(df); } return downloadInstance; @@ -670,16 +648,16 @@ public DownloadInstance downloadAuxiliaryFile(@PathParam("fileId") String fileId @POST @Consumes("text/plain") @Produces({ "application/zip" }) - public Response postDownloadDatafiles(String fileIds, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { + public Response postDownloadDatafiles(String fileIds, @QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { - return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); + return downloadDatafiles(fileIds, gbrecs, uriInfo, headers, response); } @Path("dataset/{id}") @GET @Produces({"application/zip"}) - public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersistentId, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { + public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersistentId, @QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { try { User user = findUserOrDie(); DataverseRequest req = createDataverseRequest(user); @@ -693,7 +671,7 @@ public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersist // We don't want downloads from Draft versions to be counted, // so we are setting the gbrecs (aka "do not write guestbook response") // variable accordingly: - return downloadDatafiles(fileIds, true, apiTokenParam, uriInfo, headers, response); + return downloadDatafiles(fileIds, true, uriInfo, headers, response); } } @@ -714,7 +692,7 @@ public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersist } String fileIds = getFileIdsAsCommaSeparated(latest.getFileMetadatas()); - return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); + return downloadDatafiles(fileIds, gbrecs, uriInfo, headers, response); } catch (WrappedResponse wr) { return wr.getResponse(); } @@ -763,7 +741,7 @@ public Command handleLatestPublished() { if (dsv.isDraft()) { gbrecs = true; } - return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); + return downloadDatafiles(fileIds, gbrecs, uriInfo, headers, response); } catch (WrappedResponse wr) { return wr.getResponse(); } @@ -784,11 +762,11 @@ private static String getFileIdsAsCommaSeparated(List fileMetadata @Path("datafiles/{fileIds}") @GET @Produces({"application/zip"}) - public Response datafiles(@PathParam("fileIds") String fileIds, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { - return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); + public Response datafiles(@PathParam("fileIds") String fileIds, @QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { + return downloadDatafiles(fileIds, gbrecs, uriInfo, headers, response); } - private Response downloadDatafiles(String rawFileIds, boolean donotwriteGBResponse, String apiTokenParam, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response) throws WebApplicationException /* throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { + private Response downloadDatafiles(String rawFileIds, boolean donotwriteGBResponse, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response) throws WebApplicationException /* throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { final long zipDownloadSizeLimit = systemConfig.getZipDownloadLimit(); logger.fine("setting zip download size limit to " + zipDownloadSizeLimit + " bytes."); @@ -810,11 +788,7 @@ private Response downloadDatafiles(String rawFileIds, boolean donotwriteGBRespon String customZipServiceUrl = settingsService.getValueForKey(SettingsServiceBean.Key.CustomZipDownloadServiceUrl); boolean useCustomZipService = customZipServiceUrl != null; - String apiToken = (apiTokenParam == null || apiTokenParam.equals("")) - ? headers.getHeaderString(API_KEY_HEADER) - : apiTokenParam; - - User apiTokenUser = findAPITokenUser(apiToken); //for use in adding gb records if necessary + User apiTokenUser = findAPITokenUser(); //for use in adding gb records if necessary Boolean getOrig = false; for (String key : uriInfo.getQueryParameters().keySet()) { @@ -827,7 +801,7 @@ private Response downloadDatafiles(String rawFileIds, boolean donotwriteGBRespon if (useCustomZipService) { URI redirect_uri = null; try { - redirect_uri = handleCustomZipDownload(customZipServiceUrl, fileIds, apiToken, apiTokenUser, uriInfo, headers, donotwriteGBResponse, true); + redirect_uri = handleCustomZipDownload(customZipServiceUrl, fileIds, apiTokenUser, uriInfo, headers, donotwriteGBResponse, true); } catch (WebApplicationException wae) { throw wae; } @@ -859,7 +833,7 @@ public void write(OutputStream os) throws IOException, logger.fine("token: " + fileIdParams[i]); Long fileId = null; try { - fileId = new Long(fileIdParams[i]); + fileId = Long.parseLong(fileIdParams[i]); } catch (NumberFormatException nfe) { fileId = null; } @@ -867,7 +841,7 @@ public void write(OutputStream os) throws IOException, logger.fine("attempting to look up file id " + fileId); DataFile file = dataFileService.find(fileId); if (file != null) { - if (isAccessAuthorized(file, apiToken)) { + if (isAccessAuthorized(file)) { logger.fine("adding datafile (id=" + file.getId() + ") to the download list of the ZippedDownloadInstance."); //downloadInstance.addDataFile(file); @@ -1436,8 +1410,8 @@ public Response requestFileAccess(@PathParam("id") String fileToRequestAccessId, List args = Arrays.asList(wr.getLocalizedMessage()); return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.fileAccess.failure.noUser", args)); } - - if (isAccessAuthorized(dataFile, getRequestApiKey())) { + //Already have access + if (isAccessAuthorized(dataFile)) { return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.requestAccess.failure.invalidRequest")); } @@ -1708,15 +1682,15 @@ public Response rejectFileAccess(@PathParam("id") String fileToRequestAccessId, // checkAuthorization is a convenience method; it calls the boolean method // isAccessAuthorized(), the actual workhorse, tand throws a 403 exception if not. - private void checkAuthorization(DataFile df, String apiToken) throws WebApplicationException { + private void checkAuthorization(DataFile df) throws WebApplicationException { - if (!isAccessAuthorized(df, apiToken)) { + if (!isAccessAuthorized(df)) { throw new ForbiddenException(); } } - private boolean isAccessAuthorized(DataFile df, String apiToken) { + private boolean isAccessAuthorized(DataFile df) { // First, check if the file belongs to a released Dataset version: boolean published = false; @@ -1787,37 +1761,41 @@ private boolean isAccessAuthorized(DataFile df, String apiToken) { } } - if (!restricted && !embargoed) { - // And if they are not published, they can still be downloaded, if the user + + + //The one case where we don't need to check permissions + if (!restricted && !embargoed && published) { + // If they are not published, they can still be downloaded, if the user // has the permission to view unpublished versions! (this case will // be handled below) - if (published) { - return true; - } + return true; } + //For permissions check decide if we havce a session user, or an API user User user = null; /** * Authentication/authorization: - * - * note that the fragment below - that retrieves the session object - * and tries to find the user associated with the session - is really - * for logging/debugging purposes only; for practical purposes, it - * would be enough to just call "permissionService.on(df).has(Permission.DownloadFile)" - * and the method does just that, tries to authorize for the user in - * the current session (or guest user, if no session user is available): */ - if (session != null) { + User apiTokenUser = null; + //If we get a non-GuestUser from findUserOrDie, use it. Otherwise, check the session + try { + logger.fine("calling apiTokenUser = findUserOrDie()..."); + apiTokenUser = findUserOrDie(); + } catch (WrappedResponse wr) { + logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage()); + } + + if ((apiTokenUser instanceof GuestUser) && session != null) { if (session.getUser() != null) { - if (session.getUser().isAuthenticated()) { - user = session.getUser(); - } else { + user = session.getUser(); + apiTokenUser=null; + //Fine logging + if (!session.getUser().isAuthenticated()) { logger.fine("User associated with the session is not an authenticated user."); if (session.getUser() instanceof PrivateUrlUser) { logger.fine("User associated with the session is a PrivateUrlUser user."); - user = session.getUser(); } if (session.getUser() instanceof GuestUser) { logger.fine("User associated with the session is indeed a guest user."); @@ -1829,154 +1807,41 @@ private boolean isAccessAuthorized(DataFile df, String apiToken) { } else { logger.fine("Session is null."); } - - User apiTokenUser = null; - - if ((apiToken != null)&&(apiToken.length()!=64)) { - // We'll also try to obtain the user information from the API token, - // if supplied: - - try { - logger.fine("calling apiTokenUser = findUserOrDie()..."); - apiTokenUser = findUserOrDie(); - } catch (WrappedResponse wr) { - logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage()); - } - - if (apiTokenUser == null) { - logger.warning("API token-based auth: Unable to find a user with the API token provided."); - } + //If we don't have a user, nothing more to do. (Note session could have returned GuestUser) + if (user == null && apiTokenUser == null) { + logger.warning("Unable to find a user via session or with a token."); + return false; } - - // OK, let's revisit the case of non-restricted files, this time in - // an unpublished version: + + // OK, let's revisit the case of non-restricted files, this time in + // an unpublished version: // (if (published) was already addressed above) - - if (!restricted && !embargoed) { + + DataverseRequest dvr = null; + if (apiTokenUser != null) { + dvr = createDataverseRequest(apiTokenUser); + } else { + // used in JSF context, user may be Guest + dvr = dvRequestService.getDataverseRequest(); + } + if (!published) { // and restricted or embargoed (implied by earlier processing) // If the file is not published, they can still download the file, if the user // has the permission to view unpublished versions: - - if ( user != null ) { - // used in JSF context - if (permissionService.requestOn(dvRequestService.getDataverseRequest(), df.getOwner()).has(Permission.ViewUnpublishedDataset)) { - // it's not unthinkable, that a null user (i.e., guest user) could be given - // the ViewUnpublished permission! - logger.log(Level.FINE, "Session-based auth: user {0} has access rights on the non-restricted, unpublished datafile.", user.getIdentifier()); - return true; - } - } - - if (apiTokenUser != null) { - // used in an API context - if (permissionService.requestOn( createDataverseRequest(apiTokenUser), df.getOwner()).has(Permission.ViewUnpublishedDataset)) { - logger.log(Level.FINE, "Token-based auth: user {0} has access rights on the non-restricted, unpublished datafile.", apiTokenUser.getIdentifier()); - return true; - } - } - // last option - guest user in either contexts - // Guset user is impled by the code above. - if ( permissionService.requestOn(dvRequestService.getDataverseRequest(), df.getOwner()).has(Permission.ViewUnpublishedDataset) ) { + if (permissionService.requestOn(dvr, df.getOwner()).has(Permission.ViewUnpublishedDataset)) { + // it's not unthinkable, that a GuestUser could be given + // the ViewUnpublished permission! + logger.log(Level.FINE, + "Session-based auth: user {0} has access rights on the non-restricted, unpublished datafile.", + dvr.getUser().getIdentifier()); return true; } - - } else { - - // OK, this is a restricted and/or embargoed file. - - boolean hasAccessToRestrictedBySession = false; - boolean hasAccessToRestrictedByToken = false; - - if (permissionService.on(df).has(Permission.DownloadFile)) { - // Note: PermissionServiceBean.on(Datafile df) will obtain the - // User from the Session object, just like in the code fragment - // above. That's why it's not passed along as an argument. - hasAccessToRestrictedBySession = true; - } else if (apiTokenUser != null && permissionService.requestOn(createDataverseRequest(apiTokenUser), df).has(Permission.DownloadFile)) { - hasAccessToRestrictedByToken = true; - } - - if (hasAccessToRestrictedBySession || hasAccessToRestrictedByToken) { - if (published) { - if (hasAccessToRestrictedBySession) { - if (user != null) { - logger.log(Level.FINE, "Session-based auth: user {0} is granted access to the restricted, published datafile.", user.getIdentifier()); - } else { - logger.fine("Session-based auth: guest user is granted access to the restricted, published datafile."); - } - } else { - logger.log(Level.FINE, "Token-based auth: user {0} is granted access to the restricted, published datafile.", apiTokenUser.getIdentifier()); - } - return true; - } else { - // if the file is NOT published, we will let them download the - // file ONLY if they also have the permission to view - // unpublished versions: - // Note that the code below does not allow a case where it is the - // session user that has the permission on the file, and the API token - // user with the ViewUnpublished permission, or vice versa! - if (hasAccessToRestrictedBySession) { - if (permissionService.on(df.getOwner()).has(Permission.ViewUnpublishedDataset)) { - if (user != null) { - logger.log(Level.FINE, "Session-based auth: user {0} is granted access to the restricted, unpublished datafile.", user.getIdentifier()); - } else { - logger.fine("Session-based auth: guest user is granted access to the restricted, unpublished datafile."); - } - return true; - } - } else { - if (apiTokenUser != null && permissionService.requestOn(createDataverseRequest(apiTokenUser), df.getOwner()).has(Permission.ViewUnpublishedDataset)) { - logger.log(Level.FINE, "Token-based auth: user {0} is granted access to the restricted, unpublished datafile.", apiTokenUser.getIdentifier()); - return true; - } - } - } - } - } + } else { // published and restricted and/or embargoed - - if ((apiToken != null)) { - // Will try to obtain the user information from the API token, - // if supplied: - - try { - logger.fine("calling user = findUserOrDie()..."); - user = findUserOrDie(); - } catch (WrappedResponse wr) { - logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage()); - } - - if (user == null) { - logger.warning("API token-based auth: Unable to find a user with the API token provided."); - return false; - } - - - //Doesn't this ~duplicate logic above - if so, if there's a way to get here, I think it still works for embargoed files (you only get access if you have download permissions, and, if not published, also view unpublished) - if (permissionService.requestOn(createDataverseRequest(user), df).has(Permission.DownloadFile)) { - if (published) { - logger.log(Level.FINE, "API token-based auth: User {0} has rights to access the datafile.", user.getIdentifier()); - //Same case as line 1809 (and part of 1708 though when published you don't need the DownloadFile permission) - return true; - } else { - // if the file is NOT published, we will let them download the - // file ONLY if they also have the permission to view - // unpublished versions: - if (permissionService.requestOn(createDataverseRequest(user), df.getOwner()).has(Permission.ViewUnpublishedDataset)) { - logger.log(Level.FINE, "API token-based auth: User {0} has rights to access the (unpublished) datafile.", user.getIdentifier()); - //Same case as line 1843? - return true; - } else { - logger.log(Level.FINE, "API token-based auth: User {0} is not authorized to access the (unpublished) datafile.", user.getIdentifier()); - } - } - } else { - logger.log(Level.FINE, "API token-based auth: User {0} is not authorized to access the datafile.", user.getIdentifier()); + if (permissionService.requestOn(dvr, df).has(Permission.DownloadFile)) { + return true; } - - return false; - } - + } if (user != null) { logger.log(Level.FINE, "Session-based auth: user {0} has NO access rights on the requested datafile.", user.getIdentifier()); } @@ -1984,37 +1849,30 @@ private boolean isAccessAuthorized(DataFile df, String apiToken) { if (apiTokenUser != null) { logger.log(Level.FINE, "Token-based auth: user {0} has NO access rights on the requested datafile.", apiTokenUser.getIdentifier()); } - - if (user == null && apiTokenUser == null) { - logger.fine("Unauthenticated access: No guest access to the datafile."); - } - return false; } - private User findAPITokenUser(String apiToken) { + private User findAPITokenUser() { User apiTokenUser = null; - - if ((apiToken != null) && (apiToken.length() != 64)) { - // We'll also try to obtain the user information from the API token, - // if supplied: - - try { - logger.fine("calling apiTokenUser = findUserOrDie()..."); - apiTokenUser = findUserOrDie(); - return apiTokenUser; - } catch (WrappedResponse wr) { - logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage()); - return null; + try { + logger.fine("calling apiTokenUser = findUserOrDie()..."); + apiTokenUser = findUserOrDie(); + if(apiTokenUser instanceof GuestUser) { + if(session!=null && session.getUser()!=null) { + //The apiTokenUser, if set, will override the sessionUser in permissions calcs, so set it to null if we have a session user + apiTokenUser=null; + } } - + return apiTokenUser; + } catch (WrappedResponse wr) { + logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage()); + return null; } - return apiTokenUser; } - private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, String apiToken, User apiTokenUser, UriInfo uriInfo, HttpHeaders headers, boolean donotwriteGBResponse, boolean orig) throws WebApplicationException { + private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, User apiTokenUser, UriInfo uriInfo, HttpHeaders headers, boolean donotwriteGBResponse, boolean orig) throws WebApplicationException { String zipServiceKey = null; Timestamp timestamp = null; @@ -2031,7 +1889,7 @@ private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, for (int i = 0; i < fileIdParams.length; i++) { Long fileId = null; try { - fileId = new Long(fileIdParams[i]); + fileId = Long.parseLong(fileIdParams[i]); validIdCount++; } catch (NumberFormatException nfe) { fileId = null; @@ -2040,7 +1898,7 @@ private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, DataFile file = dataFileService.find(fileId); if (file != null) { validFileCount++; - if (isAccessAuthorized(file, apiToken)) { + if (isAccessAuthorized(file)) { logger.fine("adding datafile (id=" + file.getId() + ") to the download list of the ZippedDownloadInstance."); if (donotwriteGBResponse != true && file.isReleased()) { GuestbookResponse gbr = guestbookResponseService.initAPIGuestbookResponse(file.getOwner(), file, session, apiTokenUser); From d10b15439f09ed5929b934777caaba299152040f Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Fri, 4 Nov 2022 16:55:24 +0100 Subject: [PATCH 343/608] added documentation --- doc/release-notes/9130-cleanup-storage.md | 3 +++ doc/sphinx-guides/source/api/native-api.rst | 28 +++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 doc/release-notes/9130-cleanup-storage.md diff --git a/doc/release-notes/9130-cleanup-storage.md b/doc/release-notes/9130-cleanup-storage.md new file mode 100644 index 00000000000..71387a92db2 --- /dev/null +++ b/doc/release-notes/9130-cleanup-storage.md @@ -0,0 +1,3 @@ +### Support for cleaning up files in datasets' storage + +Experimental feature: all the files stored in the Dataset storage location that are not in the file list of that Dataset can be removed with the new native API call (/api/datasets/$id/cleanStorage). \ No newline at end of file diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 6d68d648cb3..260c5cc7765 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -1511,6 +1511,34 @@ The fully expanded example above (without environment variables) looks like this curl -H X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST https://demo.dataverse.org/api/datasets/:persistentId/add?persistentId=doi:10.5072/FK2/J8SJZB -F 'jsonData={"description":"A remote image.","storageIdentifier":"trsa://themes/custom/qdr/images/CoreTrustSeal-logo-transparent.png","checksumType":"MD5","md5Hash":"509ef88afa907eaf2c17c1c8d8fde77e","label":"testlogo.png","fileName":"testlogo.png","mimeType":"image/png"}' +.. _cleanup-storage-api: + +Cleanup storage of a Dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If your Dataverse installation has been configured to support direct uploads, or in some other situations, +you could end up with some files in the storage of a dataset that are not linked to that dataset directly. Most commonly, this could +happen when an upload fails in the middle of a transfer, i.e. if a user does a UI direct upload and leaves the page without hitting cancel or save, +Dataverse doesn't know and doesn't clean up the files. Similarly in the direct upload API, if the final /addFiles call isn't done, the files are abandoned. + +You might also want to remove cached export files or some temporary files, thumbnails, etc. + +All the files stored in the Dataset storage location that are not in the file list of that Dataset can be removed, as shown in the example below. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_ID=doi:10.5072/FK2/J8SJZB + + curl -H "X-Dataverse-key: $API_TOKEN" -X GET "$SERVER_URL/api/datasets/:persistentId/cleanStorage?persistentId=$PERSISTENT_ID" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X GET https://demo.dataverse.org/api/datasets/:persistentId/cleanStorage?persistentId=doi:10.5072/FK2/J8SJZB + Report the data (file) size of a Dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 05345ba39688291d028af40497b1ada4368a1418 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 4 Nov 2022 17:03:51 +0100 Subject: [PATCH 344/608] feat(ct-base): make buildx/BuildKit use a shared state for builds Should speed up recurring builds a bit. --- modules/container-base/pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml index 67e2c2f9911..f8b59dcecaa 100644 --- a/modules/container-base/pom.xml +++ b/modules/container-base/pom.xml @@ -97,6 +97,7 @@ linux/arm64 linux/amd64 + ${project.build.directory}/buildx-state Dockerfile From e261e3701b1af286d5901e1a82f84fff525dcd74 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 4 Nov 2022 17:09:27 +0100 Subject: [PATCH 345/608] feat(ct-base): switch /docroot to /dv and add volumes #8932 - Instead of a /docroot, add a more generic /dv which is owned by payara:payara and can be used to either store data in a single volume using subfolders or use subfolders with different backing volumes. Anyway, data is not written to overlay FS this way. (As long as an app image points to this location) - Also define /secrets and /dumps as volumes, so data flowing into these locations is again not added to the overlay FS (which might cause severe damage in case of heap dumps!) - Document the different locations in the base image guide. - Remove the /docroot workaround for uploaded files. This will be solved at application level (either by moving the workaround there) or https://github.com/IQSS/dataverse/pull/8983 --- .../source/container/base-image.rst | 48 +++++++++++++++---- .../container-base/src/main/docker/Dockerfile | 19 +++----- 2 files changed, 45 insertions(+), 22 deletions(-) diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst index 197f4175538..8cf6af1f904 100644 --- a/doc/sphinx-guides/source/container/base-image.rst +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -218,7 +218,16 @@ Locations +++++++++ This environment variables represent certain locations and might be reused in your scripts etc. -These variables aren't meant to be reconfigurable and reflect state in the filesystem layout! +All of these variables aren't meant to be reconfigurable and reflect state in the filesystem layout! + +**Writeable at build time:** + +The overlay filesystem of Docker and other container technologies is not meant to be used for any performance IO. +You should avoid *writing* data anywhere in the file tree at runtime, except for well known locations with mounted +volumes backing them (see below). + +The locations below are meant to be written to when you build a container image, either this base or anything +building upon it. You can also use these for references in scripts, etc. .. list-table:: :align: left @@ -245,10 +254,35 @@ These variables aren't meant to be reconfigurable and reflect state in the files * - ``DEPLOY_DIR`` - ``${HOME_DIR}/deployments`` - Any EAR or WAR file, exploded WAR directory etc are autodeployed on start - * - ``DOCROOT_DIR`` - - ``/docroot`` - - Mount a volume here to store i18n language bundle files, sitemaps, images for Dataverse collections, logos, - custom themes and stylesheets, etc here. You might need to replicate this data or place on shared file storage. + * - ``DOMAIN_DIR`` + - ``${PAYARA_DIR}/glassfish`` ``/domains/${DOMAIN_NAME}`` + - Path to root of the Payara domain applications will be deployed into. Usually ``${DOMAIN_NAME}`` will be ``domain1``. + + +**Writeable at runtime:** + +The locations below are defined as `Docker volumes `_ by the base image. +They will by default get backed by an "anonymous volume", but you can (and should) bind-mount a host directory or +named Docker volume in these places to avoid data loss, gain performance and/or use a network file system. + +**Notes:** +1. On Kubernetes you still need to provide volume definitions for these places in your deployment objects! +2. You should not write data into these locations at build time - it will be shadowed by the mounted volumes! + +.. list-table:: + :align: left + :width: 100 + :widths: 10 10 50 + :header-rows: 1 + + * - Env. variable + - Value + - Description + * - ``STORAGE_DIR`` + - ``/dv`` + - This place is writeable by the Payara user, making it usable as a place to store research data, customizations + or other. Images inheriting the base image should create distinct folders here, backed by different + mounted volumes. * - ``SECRETS_DIR`` - ``/secrets`` - Mount secrets or other here, being picked up automatically by @@ -258,10 +292,6 @@ These variables aren't meant to be reconfigurable and reflect state in the files - ``/dumps`` - Default location where heap dumps will be stored (see above). You should mount some storage here (disk or ephemeral). - * - ``DOMAIN_DIR`` - - ``${PAYARA_DIR}/glassfish`` ``/domains/${DOMAIN_NAME}`` - - Path to root of the Payara domain applications will be deployed into. Usually ``${DOMAIN_NAME}`` will be ``domain1``. - Exposed Ports diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index cafeb2ffb59..07968e92359 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -1,4 +1,4 @@ -# Copyright 2019 Forschungszentrum Jülich GmbH +# Copyright 2022 Forschungszentrum Jülich GmbH # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -38,7 +38,7 @@ ENV PAYARA_DIR="${HOME_DIR}/appserver" \ SCRIPT_DIR="${HOME_DIR}/scripts" \ CONFIG_DIR="${HOME_DIR}/config" \ DEPLOY_DIR="${HOME_DIR}/deployments" \ - DOCROOT_DIR="/docroot" \ + STORAGE_DIR="/dv" \ SECRETS_DIR="/secrets" \ DUMPS_DIR="/dumps" \ PASSWORD_FILE="${HOME_DIR}/passwordFile" \ @@ -73,17 +73,19 @@ ARG GID=1000 USER root WORKDIR / SHELL ["/bin/bash", "-euo", "pipefail", "-c"] +# Mark these directories as mutuable data containers to avoid cluttering the images overlayfs at runtime. +VOLUME ${STORAGE_DIR} ${SECRETS_DIR} ${DUMPS_DIR} RUN < Date: Fri, 4 Nov 2022 17:10:47 +0100 Subject: [PATCH 346/608] ci(ct-base): switch some steps to run on push or schedule #8932 Instead of only running the steps to push images to Docker Hub on a Git push event, also make it possible to run them an anything not being a pull_request event. (Like a schedule) --- .github/workflows/container_base_push.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 82c7a376ae0..2520a7e9257 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -53,18 +53,18 @@ jobs: - name: Build base container image with local architecture run: mvn -f modules/container-base -Pct package - - if: ${{ github.event_name == 'push' }} # run only if this is a push - PRs have no access to secrets + - if: ${{ github.event_name != 'pull_request' }} # run only if this is not a pull request - PRs have no access to secrets name: Log in to the Container registry uses: docker/login-action@v1 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - - if: ${{ github.event_name == 'push' }} # run only if this is a push - multi-arch makes no sense with PR + - if: ${{ github.event_name != 'pull_request' }} # run only if this is not a pull request - multi-arch makes no sense with PR name: Set up QEMU for multi-arch builds uses: docker/setup-qemu-action@v2 - name: Re-set image tag based on branch if: ${{ github.ref == 'master' }} run: echo "IMAGE_TAG=release" - - if: ${{ github.event_name == 'push' }} # run only if this is a push - tag push will only succeed in upstream + - if: ${{ github.event_name != 'pull_request' }} # run only if this is not a pull request - tag push will only succeed in upstream name: Deploy multi-arch base container image to Docker Hub run: mvn -f modules/container-base -Pct deploy -Dbase.image.tag=${{ env.IMAGE_TAG }} -Ddocker.registry=${{ env.REGISTRY }} From 9ba760d67456cfebc369d7f7d83e2d2dc7f3c505 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Fri, 4 Nov 2022 18:23:16 +0100 Subject: [PATCH 347/608] replaced listAllFiles and deleteFile with cleanUp in the StorageIO interface --- .../edu/harvard/iq/dataverse/api/Datasets.java | 13 ++++++------- .../iq/dataverse/dataaccess/FileAccessIO.java | 16 +++++++++++++--- .../iq/dataverse/dataaccess/InputStreamIO.java | 9 ++------- .../dataaccess/RemoteOverlayAccessIO.java | 10 +++------- .../iq/dataverse/dataaccess/S3AccessIO.java | 17 +++++++++++++---- .../iq/dataverse/dataaccess/StorageIO.java | 5 ++--- .../iq/dataverse/dataaccess/SwiftAccessIO.java | 18 ++++++++++++++---- 7 files changed, 53 insertions(+), 35 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 0cdb2b3a73f..38017514575 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -114,6 +114,7 @@ import java.time.LocalDateTime; import java.util.*; import java.util.concurrent.*; +import java.util.function.Predicate; import java.time.ZoneId; import java.time.format.DateTimeFormatter; import java.util.Map.Entry; @@ -2546,13 +2547,11 @@ public Response cleanStorage(@PathParam("id") String idSupplied) { } } StorageIO datasetIO = DataAccess.getStorageIO(dataset); - List allDatasetFiles = datasetIO.listAllFiles(); - for (String f : allDatasetFiles) { - if (!files.contains(f)) { - datasetIO.deleteFile(f); - deleted.add(f); - } - } + Predicate filter = f -> { + return !files.contains(f); + }; + + deleted.addAll(datasetIO.cleanUp(filter)); } catch (IOException ex) { logger.log(Level.SEVERE, null, ex); return error(Response.Status.INTERNAL_SERVER_ERROR, "IOException! Serious Error! See administrator!"); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index 2bb3abf03a6..cc72a9cfb02 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -33,9 +33,11 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.List; +import java.util.function.Predicate; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; // Dataverse imports: import edu.harvard.iq.dataverse.DataFile; @@ -684,7 +686,7 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { return true; } - public List listAllFiles() throws IOException { + private List listAllFiles() throws IOException { Dataset dataset = this.getDataset(); if (dataset == null) { throw new IOException("This FileAccessIO object hasn't been properly initialized."); @@ -708,8 +710,7 @@ public List listAllFiles() throws IOException { return res; } - @Override - public void deleteFile(String fileName) throws IOException { + private void deleteFile(String fileName) throws IOException { Dataset dataset = this.getDataset(); if (dataset == null) { throw new IOException("This FileAccessIO object hasn't been properly initialized."); @@ -724,4 +725,13 @@ public void deleteFile(String fileName) throws IOException { Files.delete(p); } + @Override + public List cleanUp(Predicate filter) throws IOException { + List toDelete = this.listAllFiles().stream().filter(filter).collect(Collectors.toList()); + for (String f : toDelete) { + this.deleteFile(f); + } + return toDelete; + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java index 1235b386fe9..2a867bddcac 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java @@ -14,6 +14,7 @@ import java.nio.channels.WritableByteChannel; import java.nio.file.Path; import java.util.List; +import java.util.function.Predicate; import java.util.logging.Logger; /** @@ -160,14 +161,8 @@ public void revertBackupAsAux(String auxItemTag) throws IOException { } @Override - public List listAllFiles() throws IOException { + public List cleanUp(Predicate filter) throws IOException { throw new UnsupportedDataAccessOperationException("InputStreamIO: tthis method is not supported in this DataAccess driver."); } - - - @Override - public void deleteFile(String fileName) throws IOException { - throw new UnsupportedDataAccessOperationException("InputStreamIO: this method is not supported in this DataAccess driver."); - } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index b7fb4c86c7c..22373fdfee0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -24,6 +24,7 @@ import java.security.KeyStoreException; import java.security.NoSuchAlgorithmException; import java.util.List; +import java.util.function.Predicate; import java.util.logging.Logger; import org.apache.http.Header; @@ -630,14 +631,9 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { public static String getBaseStoreIdFor(String driverId) { return System.getProperty("dataverse.files." + driverId + ".base-store"); } - - @Override - public List listAllFiles() throws IOException { - return baseStore.listAllFiles(); - } @Override - public void deleteFile(String fileName) throws IOException { - baseStore.deleteFile(fileName); + public List cleanUp(Predicate filter) throws IOException { + return baseStore.cleanUp(filter); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 3796d7f0ce9..8dc93361375 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -60,7 +60,10 @@ import java.util.HashMap; import java.util.List; import java.util.Random; +import java.util.function.Predicate; import java.util.logging.Logger; +import java.util.stream.Collectors; + import org.apache.commons.io.IOUtils; import org.eclipse.microprofile.config.Config; import org.eclipse.microprofile.config.ConfigProvider; @@ -1308,8 +1311,7 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { - @Override - public List listAllFiles() throws IOException { + private List listAllFiles() throws IOException { if (!this.canWrite()) { open(); } @@ -1351,8 +1353,7 @@ public List listAllFiles() throws IOException { return ret; } - @Override - public void deleteFile(String fileName) throws IOException { + private void deleteFile(String fileName) throws IOException { if (!this.canWrite()) { open(); } @@ -1370,4 +1371,12 @@ public void deleteFile(String fileName) throws IOException { } } + @Override + public List cleanUp(Predicate filter) throws IOException { + List toDelete = this.listAllFiles().stream().filter(filter).collect(Collectors.toList()); + for (String f : toDelete) { + this.deleteFile(f); + } + return toDelete; + } } \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 0e42a84795c..54e457ffab6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -39,6 +39,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -622,8 +623,6 @@ protected static boolean usesStandardNamePattern(String identifier) { return m.find(); } - public abstract List listAllFiles() throws IOException; - - public abstract void deleteFile(String fileName) throws IOException; + public abstract List cleanUp(Predicate filter) throws IOException; } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java index 5a376cb8d91..8857b054108 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java @@ -22,7 +22,10 @@ import java.util.Formatter; import java.util.List; import java.util.Properties; +import java.util.function.Predicate; import java.util.logging.Logger; +import java.util.stream.Collectors; + import javax.crypto.Mac; import javax.crypto.spec.SecretKeySpec; import org.javaswift.joss.client.factory.AccountFactory; @@ -897,8 +900,7 @@ public static String calculateRFC2104HMAC(String data, String key) return toHexString(mac.doFinal(data.getBytes())); } - @Override - public List listAllFiles() throws IOException { + private List listAllFiles() throws IOException { if (!this.canWrite()) { open(DataAccessOption.WRITE_ACCESS); } @@ -922,8 +924,7 @@ public List listAllFiles() throws IOException { return ret; } - @Override - public void deleteFile(String fileName) throws IOException { + private void deleteFile(String fileName) throws IOException { if (!this.canWrite()) { open(DataAccessOption.WRITE_ACCESS); } @@ -941,4 +942,13 @@ public void deleteFile(String fileName) throws IOException { fileObject.delete(); } + + @Override + public List cleanUp(Predicate filter) throws IOException { + List toDelete = this.listAllFiles().stream().filter(filter).collect(Collectors.toList()); + for (String f : toDelete) { + this.deleteFile(f); + } + return toDelete; + } } From 00170695b920c2f5accdeb1cdcb367b6c892ab1b Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Fri, 4 Nov 2022 18:34:28 +0100 Subject: [PATCH 348/608] better filter for files to delete --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 38017514575..901b6cf17bf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2548,7 +2548,7 @@ public Response cleanStorage(@PathParam("id") String idSupplied) { } StorageIO datasetIO = DataAccess.getStorageIO(dataset); Predicate filter = f -> { - return !files.contains(f); + return files.stream().noneMatch(x -> f.startsWith(x)); }; deleted.addAll(datasetIO.cleanUp(filter)); From e16c8863b17f0d1f142725d82fe659a93fd1707e Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Mon, 7 Nov 2022 10:09:59 +0100 Subject: [PATCH 349/608] rename: postLoad -> initialize --- .../iq/dataverse/authorization/AuthenticationServiceBean.java | 2 +- .../iq/dataverse/authorization/users/AuthenticatedUser.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java index 6c401223cd5..d92ed78681b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java @@ -648,7 +648,7 @@ public AuthenticatedUser createAuthenticatedUser(UserRecordIdentifier userRecord actionLogSvc.log( new ActionLogRecord(ActionLogRecord.ActionType.Auth, "createUser") .setInfo(authenticatedUser.getIdentifier())); - authenticatedUser.postLoad(); + authenticatedUser.initialize(); return authenticatedUser; } diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java b/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java index 7299350b774..9fdfce2f1a7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java @@ -148,7 +148,7 @@ void prePersist() { } @PostLoad - public void postLoad() { + public void initialize() { mutedNotificationsSet = Type.tokenizeToSet(mutedNotifications); mutedEmailsSet = Type.tokenizeToSet(mutedEmails); } From b1d94c86197a1d2ccfd998b5ba47700af9d95c04 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Mon, 7 Nov 2022 11:00:57 +0100 Subject: [PATCH 350/608] sortOrder column made not nullable --- src/main/java/edu/harvard/iq/dataverse/license/License.java | 2 +- .../resources/db/migration/V5.12.0.1__8671-sorting_licenses.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/license/License.java b/src/main/java/edu/harvard/iq/dataverse/license/License.java index 0c8465e88e4..3073291a9d5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/license/License.java +++ b/src/main/java/edu/harvard/iq/dataverse/license/License.java @@ -76,7 +76,7 @@ public class License { @Column(nullable = false) private boolean isDefault; - @Column(nullable = true) + @Column(nullable = false) private Long sortOrder; @OneToMany(mappedBy="license") diff --git a/src/main/resources/db/migration/V5.12.0.1__8671-sorting_licenses.sql b/src/main/resources/db/migration/V5.12.0.1__8671-sorting_licenses.sql index 43631ebd165..a449c85cf16 100644 --- a/src/main/resources/db/migration/V5.12.0.1__8671-sorting_licenses.sql +++ b/src/main/resources/db/migration/V5.12.0.1__8671-sorting_licenses.sql @@ -1,5 +1,5 @@ ALTER TABLE license -ADD COLUMN IF NOT EXISTS sortorder BIGINT; +ADD COLUMN IF NOT EXISTS sortorder BIGINT NOT NULL DEFAULT(0); CREATE INDEX IF NOT EXISTS license_sortorder_id ON license (sortorder, id); \ No newline at end of file From f0ac872828d3a48cf74e74052ae1b3767afb264a Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Mon, 7 Nov 2022 13:59:16 +0100 Subject: [PATCH 351/608] updated filter: exlude export files --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 901b6cf17bf..1646dacd8b2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2548,7 +2548,7 @@ public Response cleanStorage(@PathParam("id") String idSupplied) { } StorageIO datasetIO = DataAccess.getStorageIO(dataset); Predicate filter = f -> { - return files.stream().noneMatch(x -> f.startsWith(x)); + return f.startsWith("export_") || files.stream().noneMatch(x -> f.startsWith(x)); }; deleted.addAll(datasetIO.cleanUp(filter)); From bcaeb9fd58f38fdcd8cc1587e763a80c17b55048 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Mon, 7 Nov 2022 14:05:35 +0100 Subject: [PATCH 352/608] bugfix in filter --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 1646dacd8b2..295a802c6a0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2512,7 +2512,7 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, */ @GET @Path("{id}/cleanStorage") - public Response cleanStorage(@PathParam("id") String idSupplied) { + public Response cleanStorage(@PathParam("id") String idSupplied, @QueryParam("dryrun") Boolean dryrun) { // get user and dataset User authUser; try { @@ -2548,7 +2548,7 @@ public Response cleanStorage(@PathParam("id") String idSupplied) { } StorageIO datasetIO = DataAccess.getStorageIO(dataset); Predicate filter = f -> { - return f.startsWith("export_") || files.stream().noneMatch(x -> f.startsWith(x)); + return !f.startsWith("export_") || files.stream().noneMatch(x -> f.startsWith(x)); }; deleted.addAll(datasetIO.cleanUp(filter)); From 503b9a36ca409e4bf89659cc877b62415d4ef33a Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Mon, 7 Nov 2022 14:06:35 +0100 Subject: [PATCH 353/608] added dryrun query parameter --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 295a802c6a0..0ce3a4043a8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2551,7 +2551,11 @@ public Response cleanStorage(@PathParam("id") String idSupplied, @QueryParam("dr return !f.startsWith("export_") || files.stream().noneMatch(x -> f.startsWith(x)); }; - deleted.addAll(datasetIO.cleanUp(filter)); + if (dryrun != null && dryrun.booleanValue()) { + deleted.addAll(files.stream().filter(filter).collect(Collectors.toList())); + } else { + deleted.addAll(datasetIO.cleanUp(filter)); + } } catch (IOException ex) { logger.log(Level.SEVERE, null, ex); return error(Response.Status.INTERNAL_SERVER_ERROR, "IOException! Serious Error! See administrator!"); From 4b0d36596835333bf5c528e3659b8a5bbef5ed60 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 7 Nov 2022 14:10:25 -0500 Subject: [PATCH 354/608] rename getEditVersion to getOrCreateEditVersion #8930 --- .../edu/harvard/iq/dataverse/Dataset.java | 16 ++++++++------ .../edu/harvard/iq/dataverse/DatasetPage.java | 12 +++++----- .../iq/dataverse/EditDatafilesPage.java | 8 +++---- .../edu/harvard/iq/dataverse/FilePage.java | 20 ++++++++--------- .../edu/harvard/iq/dataverse/api/Access.java | 2 +- .../harvard/iq/dataverse/api/Datasets.java | 20 ++++++++--------- .../edu/harvard/iq/dataverse/api/Files.java | 2 +- .../CollectionDepositManagerImpl.java | 2 +- .../api/datadeposit/ContainerManagerImpl.java | 2 +- .../datadeposit/MediaResourceManagerImpl.java | 4 ++-- .../filesystem/FileRecordJobListener.java | 2 +- .../datasetutility/AddReplaceFileHelper.java | 4 ++-- .../command/impl/CreateNewDatasetCommand.java | 2 +- .../CuratePublishedDatasetVersionCommand.java | 12 +++++----- .../impl/GetDraftDatasetVersionCommand.java | 2 +- .../impl/PersistProvFreeFormCommand.java | 2 +- .../command/impl/RestrictFileCommand.java | 2 +- .../impl/ReturnDatasetToAuthorCommand.java | 4 ++-- .../impl/SetCurationStatusCommand.java | 2 +- .../impl/SubmitDatasetForReviewCommand.java | 2 +- .../impl/UpdateDatasetVersionCommand.java | 22 +++++++++---------- .../impl/CreateDatasetVersionCommandTest.java | 4 ++-- .../command/impl/RestrictFileCommandTest.java | 4 ++-- .../iq/dataverse/ingest/IngestUtilTest.java | 10 ++++----- .../iq/dataverse/util/FileUtilTest.java | 2 +- 25 files changed, 83 insertions(+), 81 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index a4f82d41bac..e91221ce36c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -391,19 +391,21 @@ private DatasetVersion createNewDatasetVersion(Template template, FileMetadata f /** * The "edit version" is the most recent *draft* of a dataset, and if the - * latest version of a dataset is published, a new draft will be created. - * + * latest version of a dataset is published, a new draft will be created. If + * you don't want to create a new version, you should be using + * getLatestVersion. + * * @return The edit version {@code this}. */ - public DatasetVersion getEditVersion() { - return getEditVersion(null, null); + public DatasetVersion getOrCreateEditVersion() { + return getOrCreateEditVersion(null, null); } - public DatasetVersion getEditVersion(FileMetadata fm) { - return getEditVersion(null, fm); + public DatasetVersion getOrCreateEditVersion(FileMetadata fm) { + return getOrCreateEditVersion(null, fm); } - public DatasetVersion getEditVersion(Template template, FileMetadata fm) { + public DatasetVersion getOrCreateEditVersion(Template template, FileMetadata fm) { DatasetVersion latestVersion = this.getLatestVersion(); if (!latestVersion.isWorkingCopy() || template != null) { // if the latest version is released or archived, create a new version for editing diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 0a8db69bf5b..6e71f6c5042 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -2067,7 +2067,7 @@ private String init(boolean initFull) { } //Initalize with the default if there is one dataset.setTemplate(selectedTemplate); - workingVersion = dataset.getEditVersion(selectedTemplate, null); + workingVersion = dataset.getOrCreateEditVersion(selectedTemplate, null); updateDatasetFieldInputLevels(); } else { workingVersion = dataset.getCreateVersion(licenseServiceBean.getDefault()); @@ -2401,7 +2401,7 @@ private void resetVersionUI() { AuthenticatedUser au = (AuthenticatedUser) session.getUser(); //On create set pre-populated fields - for (DatasetField dsf : dataset.getEditVersion().getDatasetFields()) { + for (DatasetField dsf : dataset.getOrCreateEditVersion().getDatasetFields()) { if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.depositor) && dsf.isEmpty()) { dsf.getDatasetFieldValues().get(0).setValue(au.getLastName() + ", " + au.getFirstName()); } @@ -2458,7 +2458,7 @@ private void refreshSelectedFiles(List filesToRefresh){ } String termsOfAccess = workingVersion.getTermsOfUseAndAccess().getTermsOfAccess(); boolean requestAccess = workingVersion.getTermsOfUseAndAccess().isFileAccessRequest(); - workingVersion = dataset.getEditVersion(); + workingVersion = dataset.getOrCreateEditVersion(); workingVersion.getTermsOfUseAndAccess().setTermsOfAccess(termsOfAccess); workingVersion.getTermsOfUseAndAccess().setFileAccessRequest(requestAccess); List newSelectedFiles = new ArrayList<>(); @@ -2521,7 +2521,7 @@ public void edit(EditMode editMode) { if (this.readOnly) { dataset = datasetService.find(dataset.getId()); } - workingVersion = dataset.getEditVersion(); + workingVersion = dataset.getOrCreateEditVersion(); clone = workingVersion.cloneDatasetVersion(); if (editMode.equals(EditMode.METADATA)) { datasetVersionUI = datasetVersionUI.initDatasetVersionUI(workingVersion, true); @@ -3452,7 +3452,7 @@ private void deleteFiles(List filesToDelete) { if (markedForDelete.getId() != null) { // This FileMetadata has an id, i.e., it exists in the database. // We are going to remove this filemetadata from the version: - dataset.getEditVersion().getFileMetadatas().remove(markedForDelete); + dataset.getOrCreateEditVersion().getFileMetadatas().remove(markedForDelete); // But the actual delete will be handled inside the UpdateDatasetCommand // (called later on). The list "filesToBeDeleted" is passed to the // command as a parameter: @@ -3678,7 +3678,7 @@ public String save() { // have been created in the dataset. dataset = datasetService.find(dataset.getId()); - List filesAdded = ingestService.saveAndAddFilesToDataset(dataset.getEditVersion(), newFiles, null, true); + List filesAdded = ingestService.saveAndAddFilesToDataset(dataset.getOrCreateEditVersion(), newFiles, null, true); newFiles.clear(); // and another update command: diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index 6cf294ffd6d..fc8df8681af 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -539,7 +539,7 @@ public String init() { return permissionsWrapper.notFound(); } - workingVersion = dataset.getEditVersion(); + workingVersion = dataset.getOrCreateEditVersion(); //TODO: review if we we need this check; // as getEditVersion should either return the exisiting draft or create a new one @@ -890,7 +890,7 @@ private void deleteFiles(List filesForDelete) { // ToDo - FileMetadataUtil.removeFileMetadataFromList should handle these two // removes so they could be put after this if clause and the else clause could // be removed. - dataset.getEditVersion().getFileMetadatas().remove(markedForDelete); + dataset.getOrCreateEditVersion().getFileMetadatas().remove(markedForDelete); fileMetadatas.remove(markedForDelete); filesToBeDeleted.add(markedForDelete); @@ -907,7 +907,7 @@ private void deleteFiles(List filesForDelete) { // 1. delete the filemetadata from the local display list: FileMetadataUtil.removeFileMetadataFromList(fileMetadatas, markedForDelete); // 2. delete the filemetadata from the version: - FileMetadataUtil.removeFileMetadataFromList(dataset.getEditVersion().getFileMetadatas(), markedForDelete); + FileMetadataUtil.removeFileMetadataFromList(dataset.getOrCreateEditVersion().getFileMetadatas(), markedForDelete); } if (markedForDelete.getDataFile().getId() == null) { @@ -1201,7 +1201,7 @@ public String save() { */ } - workingVersion = dataset.getEditVersion(); + workingVersion = dataset.getOrCreateEditVersion(); logger.fine("working version id: " + workingVersion.getId()); if (FileEditMode.EDIT == mode && Referrer.FILE == referrer) { diff --git a/src/main/java/edu/harvard/iq/dataverse/FilePage.java b/src/main/java/edu/harvard/iq/dataverse/FilePage.java index 7f2c6dfca5c..85eb79d2ddc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FilePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/FilePage.java @@ -365,7 +365,7 @@ public String saveProvFreeform(String freeformTextInput, DataFile dataFileFromPo file.setProvEntityName(dataFileFromPopup.getProvEntityName()); //passing this value into the file being saved here is pretty hacky. Command cmd; - for (FileMetadata fmw : editDataset.getEditVersion().getFileMetadatas()) { + for (FileMetadata fmw : editDataset.getOrCreateEditVersion().getFileMetadatas()) { if (fmw.getDataFile().equals(this.fileMetadata.getDataFile())) { cmd = new PersistProvFreeFormCommand(dvRequestService.getDataverseRequest(), file, freeformTextInput); commandEngine.submit(cmd); @@ -381,15 +381,15 @@ public String restrictFile(boolean restricted) throws CommandException{ String fileNames = null; editDataset = this.file.getOwner(); if (restricted) { // get values from access popup - editDataset.getEditVersion().getTermsOfUseAndAccess().setTermsOfAccess(termsOfAccess); - editDataset.getEditVersion().getTermsOfUseAndAccess().setFileAccessRequest(fileAccessRequest); + editDataset.getOrCreateEditVersion().getTermsOfUseAndAccess().setTermsOfAccess(termsOfAccess); + editDataset.getOrCreateEditVersion().getTermsOfUseAndAccess().setFileAccessRequest(fileAccessRequest); } //using this method to update the terms for datasets that are out of compliance // with Terms of Access requirement - may get her with a file that is already restricted // we'll allow it try { Command cmd; - for (FileMetadata fmw : editDataset.getEditVersion().getFileMetadatas()) { + for (FileMetadata fmw : editDataset.getOrCreateEditVersion().getFileMetadatas()) { if (fmw.getDataFile().equals(this.fileMetadata.getDataFile())) { fileNames += fmw.getLabel(); cmd = new RestrictFileCommand(fmw.getDataFile(), dvRequestService.getDataverseRequest(), restricted); @@ -424,7 +424,7 @@ public String deleteFile() { FileMetadata markedForDelete = null; - for (FileMetadata fmd : editDataset.getEditVersion().getFileMetadatas()) { + for (FileMetadata fmd : editDataset.getOrCreateEditVersion().getFileMetadatas()) { if (fmd.getDataFile().getId().equals(fileId)) { markedForDelete = fmd; @@ -435,17 +435,17 @@ public String deleteFile() { // the file already exists as part of this dataset // so all we remove is the file from the fileMetadatas (for display) // and let the delete be handled in the command (by adding it to the filesToBeDeleted list - editDataset.getEditVersion().getFileMetadatas().remove(markedForDelete); + editDataset.getOrCreateEditVersion().getFileMetadatas().remove(markedForDelete); filesToBeDeleted.add(markedForDelete); } else { List filesToKeep = new ArrayList<>(); - for (FileMetadata fmo : editDataset.getEditVersion().getFileMetadatas()) { + for (FileMetadata fmo : editDataset.getOrCreateEditVersion().getFileMetadatas()) { if (!fmo.getDataFile().getId().equals(this.getFile().getId())) { filesToKeep.add(fmo); } } - editDataset.getEditVersion().setFileMetadatas(filesToKeep); + editDataset.getOrCreateEditVersion().setFileMetadatas(filesToKeep); } fileDeleteInProgress = true; @@ -612,7 +612,7 @@ public void setTermsMet(boolean termsMet) { public String save() { // Validate - Set constraintViolations = editDataset.getEditVersion().validate(); + Set constraintViolations = editDataset.getOrCreateEditVersion().validate(); if (!constraintViolations.isEmpty()) { //JsfHelper.addFlashMessage(JH.localize("dataset.message.validationError")); fileDeleteInProgress = false; @@ -629,7 +629,7 @@ public String save() { if (!filesToBeDeleted.isEmpty()) { // We want to delete the file (there's always only one file with this page) - editDataset.getEditVersion().getFileMetadatas().remove(filesToBeDeleted.get(0)); + editDataset.getOrCreateEditVersion().getFileMetadatas().remove(filesToBeDeleted.get(0)); deleteFileId = filesToBeDeleted.get(0).getDataFile().getId(); deleteStorageLocation = datafileService.getPhysicalFileToDelete(filesToBeDeleted.get(0).getDataFile()); } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index abeedf23b59..75aa57a0d2b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -1384,7 +1384,7 @@ public Response allowAccessRequest(@PathParam("id") String datasetToAllowAccessI return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.fileAccess.failure.noUser", args)); } - dataset.getEditVersion().getTermsOfUseAndAccess().setFileAccessRequest(allowRequest); + dataset.getOrCreateEditVersion().getTermsOfUseAndAccess().setFileAccessRequest(allowRequest); try { engineSvc.submit(new UpdateDatasetVersionCommand(dataset, dataverseRequest)); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index aff543e643c..59bf81a4b8d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -630,7 +630,7 @@ public Response updateDraftVersion( String jsonBody, @PathParam("id") String id, DatasetVersion managedVersion; if (updateDraft) { - final DatasetVersion editVersion = ds.getEditVersion(); + final DatasetVersion editVersion = ds.getOrCreateEditVersion(); editVersion.setDatasetFields(incomingVersion.getDatasetFields()); editVersion.setTermsOfUseAndAccess(incomingVersion.getTermsOfUseAndAccess()); editVersion.getTermsOfUseAndAccess().setDatasetVersion(editVersion); @@ -639,7 +639,7 @@ public Response updateDraftVersion( String jsonBody, @PathParam("id") String id, return error(Status.CONFLICT, BundleUtil.getStringFromBundle("dataset.message.toua.invalid")); } Dataset managedDataset = execCommand(new UpdateDatasetVersionCommand(ds, req)); - managedVersion = managedDataset.getEditVersion(); + managedVersion = managedDataset.getOrCreateEditVersion(); } else { boolean hasValidTerms = TermsOfUseAndAccessValidator.isTOUAValid(incomingVersion.getTermsOfUseAndAccess(), null); if (!hasValidTerms) { @@ -698,7 +698,7 @@ public Response updateVersionMetadata(String jsonLDBody, @PathParam("id") String try { Dataset ds = findDatasetOrDie(id); DataverseRequest req = createDataverseRequest(findUserOrDie()); - DatasetVersion dsv = ds.getEditVersion(); + DatasetVersion dsv = ds.getOrCreateEditVersion(); boolean updateDraft = ds.getLatestVersion().isDraft(); dsv = JSONLDUtil.updateDatasetVersionMDFromJsonLD(dsv, jsonLDBody, metadataBlockService, datasetFieldSvc, !replaceTerms, false, licenseSvc); dsv.getTermsOfUseAndAccess().setDatasetVersion(dsv); @@ -709,7 +709,7 @@ public Response updateVersionMetadata(String jsonLDBody, @PathParam("id") String DatasetVersion managedVersion; if (updateDraft) { Dataset managedDataset = execCommand(new UpdateDatasetVersionCommand(ds, req)); - managedVersion = managedDataset.getEditVersion(); + managedVersion = managedDataset.getOrCreateEditVersion(); } else { managedVersion = execCommand(new CreateDatasetVersionCommand(req, ds, dsv)); } @@ -731,14 +731,14 @@ public Response deleteMetadata(String jsonLDBody, @PathParam("id") String id) { try { Dataset ds = findDatasetOrDie(id); DataverseRequest req = createDataverseRequest(findUserOrDie()); - DatasetVersion dsv = ds.getEditVersion(); + DatasetVersion dsv = ds.getOrCreateEditVersion(); boolean updateDraft = ds.getLatestVersion().isDraft(); dsv = JSONLDUtil.deleteDatasetVersionMDFromJsonLD(dsv, jsonLDBody, metadataBlockService, licenseSvc); dsv.getTermsOfUseAndAccess().setDatasetVersion(dsv); DatasetVersion managedVersion; if (updateDraft) { Dataset managedDataset = execCommand(new UpdateDatasetVersionCommand(ds, req)); - managedVersion = managedDataset.getEditVersion(); + managedVersion = managedDataset.getOrCreateEditVersion(); } else { managedVersion = execCommand(new CreateDatasetVersionCommand(req, ds, dsv)); } @@ -769,7 +769,7 @@ private Response processDatasetFieldDataDelete(String jsonBody, String id, Datav Dataset ds = findDatasetOrDie(id); JsonObject json = Json.createReader(rdr).readObject(); - DatasetVersion dsv = ds.getEditVersion(); + DatasetVersion dsv = ds.getOrCreateEditVersion(); dsv.getTermsOfUseAndAccess().setDatasetVersion(dsv); List fields = new LinkedList<>(); DatasetField singleField = null; @@ -882,7 +882,7 @@ private Response processDatasetFieldDataDelete(String jsonBody, String id, Datav boolean updateDraft = ds.getLatestVersion().isDraft(); DatasetVersion managedVersion = updateDraft - ? execCommand(new UpdateDatasetVersionCommand(ds, req)).getEditVersion() + ? execCommand(new UpdateDatasetVersionCommand(ds, req)).getOrCreateEditVersion() : execCommand(new CreateDatasetVersionCommand(req, ds, dsv)); return ok(json(managedVersion)); @@ -932,7 +932,7 @@ private Response processDatasetUpdate(String jsonBody, String id, DataverseReque Dataset ds = findDatasetOrDie(id); JsonObject json = Json.createReader(rdr).readObject(); - DatasetVersion dsv = ds.getEditVersion(); + DatasetVersion dsv = ds.getOrCreateEditVersion(); dsv.getTermsOfUseAndAccess().setDatasetVersion(dsv); List fields = new LinkedList<>(); DatasetField singleField = null; @@ -1037,7 +1037,7 @@ private Response processDatasetUpdate(String jsonBody, String id, DataverseReque DatasetVersion managedVersion; if (updateDraft) { - managedVersion = execCommand(new UpdateDatasetVersionCommand(ds, req)).getEditVersion(); + managedVersion = execCommand(new UpdateDatasetVersionCommand(ds, req)).getOrCreateEditVersion(); } else { managedVersion = execCommand(new CreateDatasetVersionCommand(req, ds, dsv)); } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index 9dc0c3be524..4cf27064290 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -388,7 +388,7 @@ public Response updateFileMetadata(@FormDataParam("jsonData") String jsonData, } try { - DatasetVersion editVersion = df.getOwner().getEditVersion(); + DatasetVersion editVersion = df.getOwner().getOrCreateEditVersion(); //We get the new fileMetadata from the new version //This is because after generating the draft with getEditVersion, diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionDepositManagerImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionDepositManagerImpl.java index b6d75276ae1..6543d771ebe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionDepositManagerImpl.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionDepositManagerImpl.java @@ -110,7 +110,7 @@ public DepositReceipt createNew(String collectionUri, Deposit deposit, AuthCrede throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "user " + user.getDisplayInfo().getTitle() + " is not authorized to create a dataset in this dataverse."); } - DatasetVersion newDatasetVersion = dataset.getEditVersion(); + DatasetVersion newDatasetVersion = dataset.getOrCreateEditVersion(); String foreignFormat = SwordUtil.DCTERMS; try { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/ContainerManagerImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/ContainerManagerImpl.java index dc178a9a740..8fb55a8eaf6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/ContainerManagerImpl.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/ContainerManagerImpl.java @@ -137,7 +137,7 @@ public DepositReceipt replaceMetadata(String uri, Deposit deposit, AuthCredentia if (!permissionService.isUserAllowedOn(user, updateDatasetCommand, dataset)) { throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "User " + user.getDisplayInfo().getTitle() + " is not authorized to modify dataverse " + dvThatOwnsDataset.getAlias()); } - DatasetVersion datasetVersion = dataset.getEditVersion(); + DatasetVersion datasetVersion = dataset.getOrCreateEditVersion(); // erase all metadata before creating populating dataset version List emptyDatasetFields = new ArrayList<>(); datasetVersion.setDatasetFields(emptyDatasetFields); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java index 928ffd4a129..5491024c73c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java @@ -250,7 +250,7 @@ DepositReceipt replaceOrAddFiles(String uri, Deposit deposit, AuthCredentials au // Make sure that the upload type is not rsync - handled above for dual mode // ------------------------------------- - if (dataset.getEditVersion().isHasPackageFile()) { + if (dataset.getOrCreateEditVersion().isHasPackageFile()) { throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile")); } @@ -276,7 +276,7 @@ DepositReceipt replaceOrAddFiles(String uri, Deposit deposit, AuthCredentials au } String uploadedZipFilename = deposit.getFilename(); - DatasetVersion editVersion = dataset.getEditVersion(); + DatasetVersion editVersion = dataset.getOrCreateEditVersion(); if (deposit.getInputStream() == null) { throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "Deposit input stream was null."); diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java index 6b82a665c17..3ae8ce9b883 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java @@ -190,7 +190,7 @@ public void beforeJob() throws Exception { // if mode = REPLACE, remove all filemetadata from the dataset version and start fresh if (mode.equalsIgnoreCase(ImportMode.REPLACE.name())) { try { - DatasetVersion workingVersion = dataset.getEditVersion(); + DatasetVersion workingVersion = dataset.getOrCreateEditVersion(); List fileMetadataList = workingVersion.getFileMetadatas(); jobLogger.log(Level.INFO, "Removing any existing file metadata since mode = REPLACE"); for (FileMetadata fmd : fileMetadataList) { diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index 8e7922fd83b..febbb249a91 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -1200,7 +1200,7 @@ private boolean step_030_createNewFilesViaIngest(){ } // Load the working version of the Dataset - workingVersion = dataset.getEditVersion(); + workingVersion = dataset.getOrCreateEditVersion(); clone = workingVersion.cloneDatasetVersion(); try { CreateDataFileResult result = FileUtil.createDataFiles(workingVersion, @@ -1805,7 +1805,7 @@ private void setNewlyAddedFiles(List datafiles){ newlyAddedFileMetadatas = new ArrayList<>(); // Loop of uglinesss...but expect 1 to 4 files in final file list - List latestFileMetadatas = dataset.getEditVersion().getFileMetadatas(); + List latestFileMetadatas = dataset.getOrCreateEditVersion().getFileMetadatas(); for (DataFile newlyAddedFile : finalFileList){ diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDatasetCommand.java index 534e07feaae..1efaf14c755 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDatasetCommand.java @@ -81,7 +81,7 @@ protected void additionalParameterTests(CommandContext ctxt) throws CommandExcep @Override protected DatasetVersion getVersionToPersist( Dataset theDataset ) { - return theDataset.getEditVersion(); + return theDataset.getOrCreateEditVersion(); } @Override diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java index 772b6205b02..66ba00bcf55 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java @@ -56,7 +56,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { DatasetVersion updateVersion = getDataset().getLatestVersionForCopy(); // Copy metadata from draft version to latest published version - updateVersion.setDatasetFields(getDataset().getEditVersion().initDatasetFields()); + updateVersion.setDatasetFields(getDataset().getOrCreateEditVersion().initDatasetFields()); validateOrDie(updateVersion, isValidateLenient()); @@ -68,14 +68,14 @@ public Dataset execute(CommandContext ctxt) throws CommandException { TermsOfUseAndAccess oldTerms = updateVersion.getTermsOfUseAndAccess(); - TermsOfUseAndAccess newTerms = getDataset().getEditVersion().getTermsOfUseAndAccess(); + TermsOfUseAndAccess newTerms = getDataset().getOrCreateEditVersion().getTermsOfUseAndAccess(); newTerms.setDatasetVersion(updateVersion); updateVersion.setTermsOfUseAndAccess(newTerms); //Put old terms on version that will be deleted.... - getDataset().getEditVersion().setTermsOfUseAndAccess(oldTerms); + getDataset().getOrCreateEditVersion().setTermsOfUseAndAccess(oldTerms); //Also set the fileaccessrequest boolean on the dataset to match the new terms getDataset().setFileAccessRequest(updateVersion.getTermsOfUseAndAccess().isFileAccessRequest()); - List newComments = getDataset().getEditVersion().getWorkflowComments(); + List newComments = getDataset().getOrCreateEditVersion().getWorkflowComments(); if (newComments!=null && newComments.size() >0) { for(WorkflowComment wfc: newComments) { wfc.setDatasetVersion(updateVersion); @@ -91,7 +91,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { // Look for file metadata changes and update published metadata if needed List pubFmds = updateVersion.getFileMetadatas(); int pubFileCount = pubFmds.size(); - int newFileCount = tempDataset.getEditVersion().getFileMetadatas().size(); + int newFileCount = tempDataset.getOrCreateEditVersion().getFileMetadatas().size(); /* The policy for this command is that it should only be used when the change is a 'minor update' with no file changes. * Nominally we could call .isMinorUpdate() for that but we're making the same checks as we go through the update here. */ @@ -131,7 +131,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { ctxt.em().remove(mergedFmd); // including removing metadata from the list on the datafile draftFmd.getDataFile().getFileMetadatas().remove(draftFmd); - tempDataset.getEditVersion().getFileMetadatas().remove(draftFmd); + tempDataset.getOrCreateEditVersion().getFileMetadatas().remove(draftFmd); // And any references in the list held by categories for (DataFileCategory cat : tempDataset.getCategories()) { cat.getFileMetadatas().remove(draftFmd); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDraftDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDraftDatasetVersionCommand.java index 88b5a75ea22..7e32b19e576 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDraftDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDraftDatasetVersionCommand.java @@ -24,7 +24,7 @@ public GetDraftDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffect @Override public DatasetVersion execute(CommandContext ctxt) throws CommandException { - return ds.getEditVersion(); + return ds.getOrCreateEditVersion(); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PersistProvFreeFormCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PersistProvFreeFormCommand.java index aa06967675f..a258c36d6ea 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PersistProvFreeFormCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PersistProvFreeFormCommand.java @@ -36,7 +36,7 @@ public DataFile execute(CommandContext ctxt) throws CommandException { } else { Dataset dataset = dataFile.getOwner(); - DatasetVersion workingVersion = dataset.getEditVersion(); + DatasetVersion workingVersion = dataset.getOrCreateEditVersion(); if (workingVersion.isDraft()) { if (dataset.isReleased()){ diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RestrictFileCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RestrictFileCommand.java index 16fa40cd8a7..38cbeaf3d66 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RestrictFileCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RestrictFileCommand.java @@ -63,7 +63,7 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { } else { Dataset dataset = file.getOwner(); - DatasetVersion workingVersion = dataset.getEditVersion(); + DatasetVersion workingVersion = dataset.getOrCreateEditVersion(); // We need the FileMetadata for the file in the draft dataset version and the // file we have may still reference the fmd from the prior released version FileMetadata draftFmd = file.getFileMetadata(); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java index 169f6d790d3..ba0348f57d6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java @@ -37,11 +37,11 @@ public Dataset execute(CommandContext ctxt) throws CommandException { throw new IllegalCommandException(BundleUtil.getStringFromBundle("dataset.reject.datasetNotInReview"), this); } - dataset.getEditVersion().setLastUpdateTime(getTimestamp()); + dataset.getOrCreateEditVersion().setLastUpdateTime(getTimestamp()); dataset.setModificationTime(getTimestamp()); ctxt.engine().submit( new RemoveLockCommand(getRequest(), getDataset(), DatasetLock.Reason.InReview) ); - WorkflowComment workflowComment = new WorkflowComment(dataset.getEditVersion(), WorkflowComment.Type.RETURN_TO_AUTHOR, comment, (AuthenticatedUser) this.getUser()); + WorkflowComment workflowComment = new WorkflowComment(dataset.getOrCreateEditVersion(), WorkflowComment.Type.RETURN_TO_AUTHOR, comment, (AuthenticatedUser) this.getUser()); ctxt.datasets().addWorkflowComment(workflowComment); updateDatasetUser(ctxt); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCurationStatusCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCurationStatusCommand.java index c3a62a35bb3..72f0ef335fb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCurationStatusCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCurationStatusCommand.java @@ -77,7 +77,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { public Dataset save(CommandContext ctxt) throws CommandException { - getDataset().getEditVersion().setLastUpdateTime(getTimestamp()); + getDataset().getOrCreateEditVersion().setLastUpdateTime(getTimestamp()); getDataset().setModificationTime(getTimestamp()); Dataset savedDataset = ctxt.em().merge(getDataset()); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SubmitDatasetForReviewCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SubmitDatasetForReviewCommand.java index e38f5bae8e0..130030798ab 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SubmitDatasetForReviewCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SubmitDatasetForReviewCommand.java @@ -51,7 +51,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { private Dataset save(CommandContext ctxt) throws CommandException { - getDataset().getEditVersion().setLastUpdateTime(getTimestamp()); + getDataset().getOrCreateEditVersion().setLastUpdateTime(getTimestamp()); getDataset().setModificationTime(getTimestamp()); Dataset savedDataset = ctxt.em().merge(getDataset()); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java index 227c54c598f..33f64f23076 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java @@ -64,7 +64,7 @@ public UpdateDatasetVersionCommand(Dataset theDataset, DataverseRequest aRequest this.filesToDelete = new ArrayList<>(); this.clone = null; this.fmVarMet = null; - for (FileMetadata fmd : theDataset.getEditVersion().getFileMetadatas()) { + for (FileMetadata fmd : theDataset.getOrCreateEditVersion().getFileMetadatas()) { if (fmd.getDataFile().equals(fileToDelete)) { filesToDelete.add(fmd); break; @@ -114,10 +114,10 @@ public Dataset execute(CommandContext ctxt) throws CommandException { logger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", getDataset().getId()); } - getDataset().getEditVersion(fmVarMet).setDatasetFields(getDataset().getEditVersion(fmVarMet).initDatasetFields()); - validateOrDie(getDataset().getEditVersion(fmVarMet), isValidateLenient()); + getDataset().getOrCreateEditVersion(fmVarMet).setDatasetFields(getDataset().getOrCreateEditVersion(fmVarMet).initDatasetFields()); + validateOrDie(getDataset().getOrCreateEditVersion(fmVarMet), isValidateLenient()); - final DatasetVersion editVersion = getDataset().getEditVersion(fmVarMet); + final DatasetVersion editVersion = getDataset().getOrCreateEditVersion(fmVarMet); DatasetFieldUtil.tidyUpFields(editVersion.getDatasetFields(), true); @@ -204,10 +204,10 @@ public Dataset execute(CommandContext ctxt) throws CommandException { // If the datasetversion doesn't match, we have the fmd from a published version // and we need to remove the one for the newly created draft instead, so we find // it here - logger.fine("Edit ver: " + theDataset.getEditVersion().getId()); + logger.fine("Edit ver: " + theDataset.getOrCreateEditVersion().getId()); logger.fine("fmd ver: " + fmd.getDatasetVersion().getId()); - if (!theDataset.getEditVersion().equals(fmd.getDatasetVersion())) { - fmd = FileMetadataUtil.getFmdForFileInEditVersion(fmd, theDataset.getEditVersion()); + if (!theDataset.getOrCreateEditVersion().equals(fmd.getDatasetVersion())) { + fmd = FileMetadataUtil.getFmdForFileInEditVersion(fmd, theDataset.getOrCreateEditVersion()); } } fmd = ctxt.em().merge(fmd); @@ -229,21 +229,21 @@ public Dataset execute(CommandContext ctxt) throws CommandException { // In either case, to fully remove the fmd, we have to remove any other possible // references // From the datasetversion - FileMetadataUtil.removeFileMetadataFromList(theDataset.getEditVersion().getFileMetadatas(), fmd); + FileMetadataUtil.removeFileMetadataFromList(theDataset.getOrCreateEditVersion().getFileMetadatas(), fmd); // and from the list associated with each category for (DataFileCategory cat : theDataset.getCategories()) { FileMetadataUtil.removeFileMetadataFromList(cat.getFileMetadatas(), fmd); } } - for(FileMetadata fmd: theDataset.getEditVersion().getFileMetadatas()) { + for(FileMetadata fmd: theDataset.getOrCreateEditVersion().getFileMetadatas()) { logger.fine("FMD: " + fmd.getId() + " for file: " + fmd.getDataFile().getId() + "is in final draft version"); } if (recalculateUNF) { - ctxt.ingest().recalculateDatasetVersionUNF(theDataset.getEditVersion()); + ctxt.ingest().recalculateDatasetVersionUNF(theDataset.getOrCreateEditVersion()); } - theDataset.getEditVersion().setLastUpdateTime(getTimestamp()); + theDataset.getOrCreateEditVersion().setLastUpdateTime(getTimestamp()); theDataset.setModificationTime(getTimestamp()); savedDataset = ctxt.em().merge(theDataset); diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommandTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommandTest.java index 30c5048fa8f..dd8901a05dc 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommandTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommandTest.java @@ -33,7 +33,7 @@ public void testSimpleVersionAddition() throws Exception { Dataset ds = makeDataset(); // Populate the Initial version - DatasetVersion dsvInitial = ds.getEditVersion(); + DatasetVersion dsvInitial = ds.getOrCreateEditVersion(); dsvInitial.setCreateTime( dateFmt.parse("20001012") ); dsvInitial.setLastUpdateTime( dsvInitial.getLastUpdateTime() ); dsvInitial.setId( MocksFactory.nextId() ); @@ -62,7 +62,7 @@ public void testSimpleVersionAddition() throws Exception { assertEquals( dsvCreationDate, dsvNew.getLastUpdateTime() ); assertEquals( dsvCreationDate.getTime(), ds.getModificationTime().getTime() ); assertEquals( ds, dsvNew.getDataset() ); - assertEquals( dsvNew, ds.getEditVersion() ); + assertEquals( dsvNew, ds.getOrCreateEditVersion() ); Map> expected = new HashMap<>(); expected.put(ds, Collections.singleton(Permission.AddDataset)); assertEquals(expected, testEngine.getReqiredPermissionsForObjects() ); diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/RestrictFileCommandTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/RestrictFileCommandTest.java index 1e8b8fb3106..7b663389a3a 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/RestrictFileCommandTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/RestrictFileCommandTest.java @@ -108,7 +108,7 @@ public void testRestrictPublishedFile() throws Exception{ //asserts assertTrue(!file.isRestricted()); boolean fileFound = false; - for (FileMetadata fmw : dataset.getEditVersion().getFileMetadatas()) { + for (FileMetadata fmw : dataset.getOrCreateEditVersion().getFileMetadatas()) { if (file.equals(fmw.getDataFile())) { fileFound=true; //If it worked fmw is for the draft version and file.getFileMetadata() is for the published version @@ -193,7 +193,7 @@ public void testUnrestrictPublishedFile() throws Exception{ //asserts assertTrue(file.isRestricted()); boolean fileFound = false; - for (FileMetadata fmw : dataset.getEditVersion().getFileMetadatas()) { + for (FileMetadata fmw : dataset.getOrCreateEditVersion().getFileMetadatas()) { if (file.equals(fmw.getDataFile())) { fileFound = true; assertTrue(!fmw.isRestricted()); diff --git a/src/test/java/edu/harvard/iq/dataverse/ingest/IngestUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/ingest/IngestUtilTest.java index 8e4b81ec921..ca68af4090c 100644 --- a/src/test/java/edu/harvard/iq/dataverse/ingest/IngestUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/ingest/IngestUtilTest.java @@ -42,7 +42,7 @@ public void testCheckForDuplicateFileNamesNoDirectories() throws Exception { Dataset dataset = makeDataset(); // create dataset version - DatasetVersion datasetVersion = dataset.getEditVersion(); + DatasetVersion datasetVersion = dataset.getOrCreateEditVersion(); datasetVersion.setCreateTime(dateFmt.parse("20001012")); datasetVersion.setLastUpdateTime(datasetVersion.getLastUpdateTime()); datasetVersion.setId(MocksFactory.nextId()); @@ -146,7 +146,7 @@ public void testCheckForDuplicateFileNamesWithEmptyDirectoryLabels() throws Exce Dataset dataset = makeDataset(); // create dataset version - DatasetVersion datasetVersion = dataset.getEditVersion(); + DatasetVersion datasetVersion = dataset.getOrCreateEditVersion(); datasetVersion.setCreateTime(dateFmt.parse("20001012")); datasetVersion.setLastUpdateTime(datasetVersion.getLastUpdateTime()); datasetVersion.setId(MocksFactory.nextId()); @@ -251,7 +251,7 @@ public void testCheckForDuplicateFileNamesWithDirectories() throws Exception { Dataset dataset = makeDataset(); // create dataset version - DatasetVersion datasetVersion = dataset.getEditVersion(); + DatasetVersion datasetVersion = dataset.getOrCreateEditVersion(); datasetVersion.setCreateTime(dateFmt.parse("20001012")); datasetVersion.setLastUpdateTime(datasetVersion.getLastUpdateTime()); datasetVersion.setId(MocksFactory.nextId()); @@ -389,7 +389,7 @@ public void testCheckForDuplicateFileNamesTabular() throws Exception { Dataset dataset = makeDataset(); // create dataset version - DatasetVersion datasetVersion = dataset.getEditVersion(); + DatasetVersion datasetVersion = dataset.getOrCreateEditVersion(); datasetVersion.setCreateTime(dateFmt.parse("20001012")); datasetVersion.setLastUpdateTime(datasetVersion.getLastUpdateTime()); datasetVersion.setId(MocksFactory.nextId()); @@ -475,7 +475,7 @@ public void testCheckForDuplicateFileNamesWhenReplacing() throws Exception { Dataset dataset = makeDataset(); // create dataset version - DatasetVersion datasetVersion = dataset.getEditVersion(); + DatasetVersion datasetVersion = dataset.getOrCreateEditVersion(); datasetVersion.setCreateTime(dateFmt.parse("20001012")); datasetVersion.setLastUpdateTime(datasetVersion.getLastUpdateTime()); datasetVersion.setId(MocksFactory.nextId()); diff --git a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java index 01fb8aad6cf..f3d9d5eda46 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java @@ -128,7 +128,7 @@ public void testIsDownloadPopupRequiredNull() { @Test public void testIsDownloadPopupRequiredDraft() { Dataset dataset = new Dataset(); - DatasetVersion dsv1 = dataset.getEditVersion(); + DatasetVersion dsv1 = dataset.getOrCreateEditVersion(); assertEquals(DatasetVersion.VersionState.DRAFT, dsv1.getVersionState()); assertEquals(false, FileUtil.isDownloadPopupRequired(dsv1)); } From 81254adc4b3a2d0f9584b083143c2a4ec17fe980 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Tue, 8 Nov 2022 15:39:03 +0100 Subject: [PATCH 355/608] bugfix in test: licenseId was not parsed correctly --- src/test/java/edu/harvard/iq/dataverse/api/LicensesIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/LicensesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/LicensesIT.java index 50d3c5b34ea..30fc603a998 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/LicensesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/LicensesIT.java @@ -92,7 +92,7 @@ public void testLicenses(){ body = getLicensesResponse.getBody().asString(); status = JsonPath.from(body).getString("status"); //Last added licens; with the highest id - long licenseId = JsonPath.from(body).getList("data[*].id").stream().max((x, y) -> Long.compare(x, y)).get(); + long licenseId = (long) JsonPath.from(body).getList("data.id").stream().max((x, y) -> Integer.compare(x, y)).get(); //Assumes the first license is active, which should be true on a test server long activeLicenseId = JsonPath.from(body).getLong("data[0].id"); assertEquals("OK", status); From 35540f95834f1b59f06635a2f5ff33e6dbce5fd2 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Tue, 8 Nov 2022 15:50:43 +0100 Subject: [PATCH 356/608] typo fix --- src/test/java/edu/harvard/iq/dataverse/api/LicensesIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/LicensesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/LicensesIT.java index 30fc603a998..d6bfdb96777 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/LicensesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/LicensesIT.java @@ -91,7 +91,7 @@ public void testLicenses(){ getLicensesResponse.prettyPrint(); body = getLicensesResponse.getBody().asString(); status = JsonPath.from(body).getString("status"); - //Last added licens; with the highest id + //Last added license; with the highest id long licenseId = (long) JsonPath.from(body).getList("data.id").stream().max((x, y) -> Integer.compare(x, y)).get(); //Assumes the first license is active, which should be true on a test server long activeLicenseId = JsonPath.from(body).getLong("data[0].id"); From fbfcaa4c5fec93dc2e8ea434497a700e5a047463 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 8 Nov 2022 16:30:39 +0100 Subject: [PATCH 357/608] docs,ci(ct-base): add and push README description to Docker Hub #8932 When pushing to Docker Hub from development, we now also push a short description with disclaimers, links to docs and license hints. --- .github/workflows/container_base_push.yml | 21 +++++++-- modules/container-base/README.md | 56 +++++++++++++++++++++++ 2 files changed, 74 insertions(+), 3 deletions(-) create mode 100644 modules/container-base/README.md diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 2520a7e9257..1ef8ba94e78 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -9,6 +9,7 @@ on: paths: - 'modules/container-base/**' - 'modules/dataverse-parent/pom.xml' + - '.github/workflows/container_base_push.yml' pull_request: branches: - 'develop' @@ -16,6 +17,7 @@ on: paths: - 'modules/container-base/**' - 'modules/dataverse-parent/pom.xml' + - '.github/workflows/container_base_push.yml' env: IMAGE_TAG: develop @@ -53,18 +55,31 @@ jobs: - name: Build base container image with local architecture run: mvn -f modules/container-base -Pct package - - if: ${{ github.event_name != 'pull_request' }} # run only if this is not a pull request - PRs have no access to secrets + # Run anything below only if this is not a pull request. + # Accessing, pushing tags etc. to DockerHub will only succeed in upstream because secrets. + + - if: ${{ github.event_name != 'pull_request' && github.ref == 'develop' }} + name: Push description to DockerHub + uses: peter-evans/dockerhub-description@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + repository: gdcc/base + short-description: "Dataverse Base Container image providing Payara application server and optimized configuration" + readme-filepath: ./modules/container-base/README.md + + - if: ${{ github.event_name != 'pull_request' }} name: Log in to the Container registry uses: docker/login-action@v1 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - - if: ${{ github.event_name != 'pull_request' }} # run only if this is not a pull request - multi-arch makes no sense with PR + - if: ${{ github.event_name != 'pull_request' }} name: Set up QEMU for multi-arch builds uses: docker/setup-qemu-action@v2 - name: Re-set image tag based on branch if: ${{ github.ref == 'master' }} run: echo "IMAGE_TAG=release" - - if: ${{ github.event_name != 'pull_request' }} # run only if this is not a pull request - tag push will only succeed in upstream + - if: ${{ github.event_name != 'pull_request' }} name: Deploy multi-arch base container image to Docker Hub run: mvn -f modules/container-base -Pct deploy -Dbase.image.tag=${{ env.IMAGE_TAG }} -Ddocker.registry=${{ env.REGISTRY }} diff --git a/modules/container-base/README.md b/modules/container-base/README.md new file mode 100644 index 00000000000..d6f93b14da7 --- /dev/null +++ b/modules/container-base/README.md @@ -0,0 +1,56 @@ +# Dataverse Base Container Image + +A "base image" offers you a pre-installed and pre-tuned application server to deploy Dataverse software to. +Adding basic functionality like executing scripts at container boot, monitoring, memory tweaks etc is all done +at this layer, to make the application image focus on the app itself. + +## Quick Reference + +**Maintained by:** + +This image is created, maintained and supported by the Dataverse community on a best-effort basis. + +**Where to find documentation:** + +The [Dataverse Container Guide - Base Image](https://guides.dataverse.org/en/latest/container/base-image.html) +provides in-depth information about content, building, tuning and so on for this image. + +**Where to get help and ask questions:** + +IQSS will not offer you support how to deploy or run it, please reach out to the community for help on using it. +You can join the Community Chat on Matrix at https://chat.dataverse.org or the Community Slack at +https://dataversecommunity.slack.com to ask for help and guidance. + +## Supported Image Tags + +This image is sourced within the main upstream code [repository of the Dataverse software](https://github.com/IQSS/dataverse). +Development and maintenance happens there (again, by the community). Community supported image tags are based on the two +most important branches: + +- `develop` representing the unstable state of affairs in Dataverse's development branch + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/develop/modules/container-base/src/main/docker/Dockerfile)) +- `release` representing the latest stable release in Dataverse's main branch + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/master/modules/container-base/src/main/docker/Dockerfile)) + +Within the main repository, you may find the base image's files at `/modules/container-base`. +This Maven module uses the `Maven Docker Plugin `_ to build and ship the image. +You may use, extend, or alter this image to your liking and/or host in some different registry if you want to. + +**Supported architectures:** This image is created as a "multi-arch image", supporting the most common architectures +Dataverse usually runs on: AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2). + +## License + +Image content created by the community is licensed under [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0), +like the [main Dataverse project](https://github.com/IQSS/dataverse/blob/develop/LICENSE.md). + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. + +As with all Docker images, all images likely also contain other software which may be under other licenses (such as +[Payara Server](https://github.com/payara/Payara/blob/master/LICENSE.txt), Bash, etc from the base +distribution, along with any direct or indirect (Java) dependencies contained). + +As for any pre-built image usage, it is the image user's responsibility to ensure that any use of this image complies +with any relevant licenses for all software contained within. From 1241591eb171609542df9e218388f6bb71e7ae71 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 8 Nov 2022 16:31:33 +0100 Subject: [PATCH 358/608] docs(ct-base): add short intro to base image docs page #8932 Explain a bit (short!) what this image is and what to expect. --- doc/sphinx-guides/source/container/base-image.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst index 8cf6af1f904..8016ce95f27 100644 --- a/doc/sphinx-guides/source/container/base-image.rst +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -4,8 +4,13 @@ Application Base Image .. contents:: |toctitle| :local: +A "base image" offers you a pre-installed and pre-tuned application server to deploy Dataverse software to. +Adding basic functionality like executing scripts at container boot, monitoring, memory tweaks etc is all done +at this layer, to make the application image focus on the app itself. + Within the main repository, you may find the base image's files at ``/modules/container-base``. This Maven module uses the `Maven Docker Plugin `_ to build and ship the image. +You may use, extend, or alter this image to your liking and/or host in some different registry if you want to. **NOTE: This image is created, maintained and supported by the Dataverse community on a best-effort basis.** IQSS will not offer you support how to deploy or run it, please reach out to the community for help on using it. From 7dce7d72a8a70eea7279f540c32fbb84bb3a0319 Mon Sep 17 00:00:00 2001 From: chenganj Date: Tue, 8 Nov 2022 12:39:46 -0500 Subject: [PATCH 359/608] refactored --- .../iq/dataverse/dataset/DatasetUtil.java | 18 ++++++++++++------ src/main/webapp/dataset-license-terms.xhtml | 6 +++--- .../webapp/datasetLicenseInfoFragment.xhtml | 4 ++-- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java index fecfdc2bcfb..f1785a42098 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java @@ -39,6 +39,7 @@ import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.EnumUtils; public class DatasetUtil { @@ -547,7 +548,7 @@ public static License getLicense(DatasetVersion dsv) { public static String getLicenseName(DatasetVersion dsv) { License license = DatasetUtil.getLicense(dsv); - return license != null ? getLocalizedLicenseDetails(license.getName(),".name") + return license != null ? getLocalizedLicenseDetails(license,"NAME") : BundleUtil.getStringFromBundle("license.custom"); } @@ -573,15 +574,21 @@ public static String getLicenseIcon(DatasetVersion dsv) { public static String getLicenseDescription(DatasetVersion dsv) { License license = DatasetUtil.getLicense(dsv); - return license != null ? getLocalizedLicenseDetails(license.getName(),".description") : BundleUtil.getStringFromBundle("license.custom.description"); + return license != null ? getLocalizedLicenseDetails(license,"DESCRIPTION") : BundleUtil.getStringFromBundle("license.custom.description"); } - public static String getLocalizedLicenseDetails(String licenseName,String keyPart) { - String key = "license." + licenseName.toLowerCase().replace(" ", "_") + keyPart; + public enum LicenseOption { + NAME, DESCRIPTION + }; + public static String getLocalizedLicenseDetails(License license,String keyPart) { + String licenseName = license.getName(); String localizedLicenseValue = "" ; try { - localizedLicenseValue = BundleUtil.getStringFromPropertyFile(key, "License"); + if (EnumUtils.isValidEnum(LicenseOption.class, keyPart ) ){ + String key = "license." + licenseName.toLowerCase().replace(" ", "_") + "." + keyPart.toLowerCase(); + localizedLicenseValue = BundleUtil.getStringFromPropertyFile(key, "License"); + } } catch (Exception e) { localizedLicenseValue = licenseName; @@ -591,7 +598,6 @@ public static String getLocalizedLicenseDetails(String licenseName,String keyPar localizedLicenseValue = licenseName ; } return localizedLicenseValue; - } public static String getLocaleExternalStatus(String status) { diff --git a/src/main/webapp/dataset-license-terms.xhtml b/src/main/webapp/dataset-license-terms.xhtml index 760f39d7170..8b5c86b9c1c 100644 --- a/src/main/webapp/dataset-license-terms.xhtml +++ b/src/main/webapp/dataset-license-terms.xhtml @@ -46,7 +46,7 @@

+ var="license" itemLabel="#{DatasetUtil:getLocalizedLicenseDetails(license, 'NAME')}" itemValue="#{license}"/> @@ -55,8 +55,8 @@

- - #{DatasetUtil:getLocalizedLicenseDetails(termsOfUseAndAccess.license.name,'.name')} + + #{DatasetUtil:getLocalizedLicenseDetails(termsOfUseAndAccess.license,'NAME')}

diff --git a/src/main/webapp/datasetLicenseInfoFragment.xhtml b/src/main/webapp/datasetLicenseInfoFragment.xhtml index e7a393a8ae7..257f6b3b12f 100644 --- a/src/main/webapp/datasetLicenseInfoFragment.xhtml +++ b/src/main/webapp/datasetLicenseInfoFragment.xhtml @@ -30,12 +30,12 @@ xmlns:jsf="http://xmlns.jcp.org/jsf">
+ jsf:rendered="#{!empty DatasetUtil:getLocalizedLicenseDetails(DatasetPage.workingVersion.termsOfUseAndAccess.license,'DESCRIPTION')} }">
- +
From c5bc60755f22955fcfa935085123d81d438f4423 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 8 Nov 2022 14:58:53 -0500 Subject: [PATCH 360/608] detect NetCDF and HDF5 files based on content #9117 --- doc/release-notes/9117-file-type-detection.md | 1 + modules/dataverse-parent/pom.xml | 5 +++ pom.xml | 8 +++- .../harvard/iq/dataverse/util/FileUtil.java | 33 +++++++++++++++++ .../iq/dataverse/util/FileUtilTest.java | 35 ++++++++++++++++++ src/test/resources/hdf/hdf4/hdf4test | Bin 0 -> 30275 bytes src/test/resources/hdf/hdf5/vlen_string_dset | Bin 0 -> 6304 bytes src/test/resources/netcdf/madis-raob.nc | Bin 0 -> 150612 bytes 8 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 doc/release-notes/9117-file-type-detection.md create mode 100644 src/test/resources/hdf/hdf4/hdf4test create mode 100644 src/test/resources/hdf/hdf5/vlen_string_dset create mode 100644 src/test/resources/netcdf/madis-raob.nc diff --git a/doc/release-notes/9117-file-type-detection.md b/doc/release-notes/9117-file-type-detection.md new file mode 100644 index 00000000000..7901b478acc --- /dev/null +++ b/doc/release-notes/9117-file-type-detection.md @@ -0,0 +1 @@ +NetCDF and HDF5 files are now detected based on their content rather than just their file extension. diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index c1ba693da1b..e36a78b11be 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -299,6 +299,11 @@ true + + unidata-all + Unidata All + https://artifacts.unidata.ucar.edu/repository/unidata-all/ + dvn.private Local repository for hosting jars not available from network repositories. diff --git a/pom.xml b/pom.xml index c6459cfc55c..8b6f98c5896 100644 --- a/pom.xml +++ b/pom.xml @@ -25,6 +25,7 @@ 0.8.7 5.2.1 2.4.1 + 5.5.3 org.junit.jupiter diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 339de904f9e..dc4f8b97f9a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -108,6 +108,8 @@ import java.util.Arrays; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; +import ucar.nc2.NetcdfFile; +import ucar.nc2.NetcdfFiles; /** * a 4.0 implementation of the DVN FileUtil; @@ -467,6 +469,11 @@ public static String determineFileType(File f, String fileName) throws IOExcepti fileType = "application/fits"; } } + + // step 3: Check if NetCDF or HDF5 + if (fileType == null) { + fileType = checkNetcdfOrHdf5(f); + } // step 3: check the mime type of this file with Jhove if (fileType == null){ @@ -669,6 +676,32 @@ private static boolean isGraphMLFile(File file) { return isGraphML; } + public static String checkNetcdfOrHdf5(File file) { + try ( NetcdfFile netcdfFile = NetcdfFiles.open(file.getAbsolutePath())) { + if (netcdfFile == null) { + // Can't open as a NetCDF or HDF5 file. + return null; + } + String type = netcdfFile.getFileTypeId(); + if (type == null) { + return null; + } + switch (type) { + case "NETCDF": + return "application/netcdf"; + case "NetCDF-4": + return "application/netcdf"; + case "HDF5": + return "application/x-hdf5"; + default: + break; + } + } catch (IOException ex) { + return null; + } + return null; + } + // from MD5Checksum.java public static String calculateChecksum(String datafile, ChecksumType checksumType) { diff --git a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java index 01fb8aad6cf..e710236e446 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java @@ -373,4 +373,39 @@ public void testIsThumbnailSupported() throws Exception { assertFalse(FileUtil.isThumbnailSupported(filewBogusContentType)); } } + + @Test + public void testNetcdfFile() throws IOException { + // We got madis-raob.nc from https://www.unidata.ucar.edu/software/netcdf/examples/files.html + String path = "src/test/resources/netcdf/"; + String pathAndFile = path + "madis-raob.nc"; + File file = new File(pathAndFile); + String contentType = FileUtil.determineFileType(file, pathAndFile); + assertEquals("application/netcdf", contentType); + } + + @Test + public void testHdf5File() throws IOException { + // We got vlen_string_dset.h5 from https://github.com/h5py/h5py/blob/3.7.0/h5py/tests/data_files/vlen_string_dset.h5 + // and named in "vlen_string_dset" with no file extension for this test. + String path = "src/test/resources/hdf/hdf5/"; + String pathAndFile = path + "vlen_string_dset"; + File file = new File(pathAndFile); + String contentType = FileUtil.determineFileType(file, pathAndFile); + assertEquals("application/x-hdf5", contentType); + } + + @Test + public void testHdf4File() throws IOException { + // We got test.hdf from https://people.sc.fsu.edu/~jburkardt/data/hdf/hdf.html + // and named in "hdf4test" with no file extension for this test. + // HDF4 is the old format, the previous generation before HDF5. + // We can't detect it based on its content. + String path = "src/test/resources/hdf/hdf4/"; + String pathAndFile = path + "hdf4test"; + File file = new File(pathAndFile); + String contentType = FileUtil.determineFileType(file, pathAndFile); + assertEquals("application/octet-stream", contentType); + } + } diff --git a/src/test/resources/hdf/hdf4/hdf4test b/src/test/resources/hdf/hdf4/hdf4test new file mode 100644 index 0000000000000000000000000000000000000000..4674fdde19487d5c28b44f54562fbe5118284e0d GIT binary patch literal 30275 zcmeI532+ou8pprf0b@it1YvblR8$N}IE3IzGLs;|5Rn8#K_Mgq2@DxXCJ;a|;g(C} zlFNX|jUXx_9$>r>FT@*9sC8@C+TGf+T+3p2E!p4K(;0HiWCsbTq-W}v|35FUyI+6r z&Fh}mmG@%9nuo|i(buICBGP0uZfMkYnIuy19zHy3o8dpicZ=E~lSM{1!Q03)1%EBR z9eLEJ;X9M(UVN#MzFqFaFE#RSGyHBd&s5U?YUa6LWK0-dkEK4!$g^F_@JaY+($(AX z-8gPJUiYu*>iL_IJ`KOeOjp0(OrMVb!Aw{GlbJq4#FdC|%YM}N({vdvGx6k;@X&pR zpN(%7dRRh9e*_=HInKgw#ivsq{_gr-^RplIKjJ%+K1XD%#z)JkJ^`=ge*izqOjqwV z)92zhn(69ynCbKI-`&wkW@hS%~h5^=}iwfyQ^;I;e@;rp8D>IazVi}9SNkzYOA82Ojr518rd z51Hw-i*e!jmh4}B8#8?w{x-apM|~E)JLk0=uVvQrQcqcp^I{+4G;Ufi596Q2_oJLE z@IT`t*bja@=Wm?XN_;oGo|pQY@p@jX@E$W=J?+>?UyXmtOjrNBnZ5@9otduwxS38H zDT>2)VE^g|X*%Vvz*A2~`PFNDE~Q=~@H_EZ{5t*}nd$0ZH`6!Z|7)h>JsKa)zY*U9ujN;tW2SGy6JKNh z>M5&HFPrh7;WAa?ytd(KAJUz4^=}*L zJ7hav>oF~Kza&xR7vYad3+X9xaWD1u{pwp^hHpmtP54eCA))v`inJjn-332h7>&=zv=(v1A-GtK-Z_2l^2J8g*_ZJF)~&Z4m+oJF>T9z8l&br|)kUMJ{c=!in3wRT5n()h z7v)Kl)Pk{ta$NCo@kVAXj)c(HM)`6?4J1pAzZ*H(m(e8}rPqCHPfqX7E6VQ?!wO1E z{TwdLI9wAk{v3H~hl(-|7aibmlP38&ocHX`<8b`B_Tk)xPWpOY-k60OF$?#_EMj10 zdUlpXrFc9=)dNyFtkT%Z_z@z}8fJc04|g_P6|Yx_5e7MrvkC zzq%(A(jq!UFSEugio2uc)!|N}xttfZgZH+RZy6oBkNb*fX)5PStX#m={zAEkoXy!& zxE7Hk=hUdLUs}yaPgI&^)2cnyY7Tm9f1$1nV(WUUIq0qZ9Xl)EUF4}fL8- zP&M1C*~yxXrdHclhnj6$PtC?iOKBw+(}%T@OUT)lJ}1H`RRdLR*vm&xobXldDOYVu z&|CWpZ!pIWT(!X*JMbqs!(BMamhS(v+tR4vmeg>|U}~5Eo-jTg{;xMphxIOSxwz7l zIQ@L?Tid*?LpyYB9qp?_>u9gv7ixa)xkh)k4adAz zu9NG@-H|GdFp65I1>{w40+B{}^|mF_SKa_kpwV^`sqPZ4=x3@4RK7a9jPQG3{=44# z`;N%ux=pAnO{fMmp{{bi(TCJ%LcU%^?;m0$PC7}vBuJukmM(13RhxzP{OhzMkF(el zmzy`zD7m-b;l^EG^vRZ#ot~YMR=2-P&r7ngrFRbmEv)nkTK@r}xdx zlX1Hg!|+5qxk_$eAnHbT7418tNJnb#>NmLBx1WudgxIH?jpNilhS@Tz-Bnh#i_rZ> zyYP2?NeXa<(Msx7CX$RM^CMaPI;ezWA}IpRzy^HlN=b(PFdW7~8O(u2umaY>7T5)Q zp%MKy9EbnFPw=ydLx48u2!${R2X5jVkwC+AL_-s33gZ0W^aP z;UZ`bEr8oxM=Q7(T0=gV<3^nzqa0Xq;3M=GR2Z@2~eKsxk= zTOkAbK_=V={o!^P09lX?17Q%{0fXUAxC@5BP`DfJfnhKla$p4HLLN9_B#eT5D1gy0 z23#-}3c(HIU_2B7x8e>Dl)waFh}=;MlVCDTfqUUTma#qFd2z=r(jax*dHSeH`6^ z?m%~>Qb8+`_S2Hk`1L7zpRMfak6(dW?T(C5+T z(HGDc&==7c(S7JXbU(TueF=RDtwbx)m(iEeSI}3`SJ79|*U;C{*U{I}1Ly(t4fGB4 zAbJpe6MYkX3w;ZH8+{vn2Ym;97kw8!gdRfQL*GLWqleM=(f830&=1fL(GSs&(2vlM z(T~wj&`;1$(NEDM=n?cY^fUBx^mFtJ^b7P$^h@+B^egmh^lS7edbCWemlXjk0#*d9 z2v`xYB490ts0V@Jl1gr>H5wId)MZk)H6#**(Rs^gF zSP`%yU`4=+fE58N0#*d92v`xYB492`H9)W111pX&d z_@*14R1hZFg@XgW2Z8M*v5qo}Isz`V`K zDI;Z+Rgmjt?-oWS%Y0nd2$Xx8>*L*~(oy!Cqdub3#tBC#T%QOqrhi za;GiVT~b)?DVh`~Qf|H)ZQ#$b6E6#?`8Gak@y2Z6`cW(4bUiZR96RY#JDvJIYW}`5 zcmmD#Su@i5o8(kaJGEk2xvdCT5%@htpla^OhSub!-Y7KmerxZqD%Q2r8_Vx{Z49Yo zMHUT2nUwN#qS{9Miaj|=XYD!8`KBEB%gw}q^OO?I(|Ml2EcxbA0cT|)fGKC|d4hM{ zf%_Wrc|uq)lkwLx&^)a2WW)DA6Rx?8fWJwigE@9R1I1y z;a-`_0(Og7pl-1&VM)1VY`2`Z^?X5_%IUY1U*Y*e1r_guhM>pS%&T)tJ_?)Z%)UQry6jh@04UGw*YB!2rFXn#u~N3t3Hbt!!b9MivnWxM0Bl E0NB#lt^fc4 literal 0 HcmV?d00001 diff --git a/src/test/resources/hdf/hdf5/vlen_string_dset b/src/test/resources/hdf/hdf5/vlen_string_dset new file mode 100644 index 0000000000000000000000000000000000000000..dd20547f8e9a5d0597c76763b2618131c96033cd GIT binary patch literal 6304 zcmeHI%}N7749@JfI#k4;lX|KzAo>Q1ySfLl#rEK_B2{~_uv@8*pf~Xedh}6zD?6Da zW$`FT1-}#~nIw~){rL9lYmuF;xAt1Z<~=oNGlsT&sm*pIV|%L)G;r!))PE!rIN~3X z9;Ypx|Glq&mFE;XZN7OOM~4lGyd~iO6#_+m(&ZPv*0(m6ek#uljI4JQL*rv%g&%rH zE0MplFhhmO&dC#$mf^b*OGBM2yZaQG&i9l~VQ!Or+$V{oz4oVWpwvE=LAq{-em0)5 z_I}qVdXD>LExpT5#$U8vmfGpEZi43m0U+?#5m*fuoT*r_M%MQ%9nD&uHLhM~X+J;h zTtxj;!51x+bD>PtbU2&@Z+`=Po~;^}<#>8PCQp_hRqg8O_Ff00e*l5C8%|00{gt0&|>?K0E*b literal 0 HcmV?d00001 diff --git a/src/test/resources/netcdf/madis-raob.nc b/src/test/resources/netcdf/madis-raob.nc new file mode 100644 index 0000000000000000000000000000000000000000..d0cae0d077d12c7dd1279f63274997847c5a627a GIT binary patch literal 150612 zcmeHQZEzjcd0yGrl5A{jQf%{;ED*o}$w(H)HU%X4%CcnqA?vy}rD@DccO`AOU%4L^ zqBLkGGy};@!%RCAoRYYZ8JG#3Ap;Dgp+x~^LOV^GeoRZ!q;;pA3_tk6q(7Qwn7Yq< z_U_f$bI-l^thkb{#e2u=-Mf3v+4nr>Iq!4M*>|sE1A{G=Wzn_{+r`+{=G^fk#T1VG zPg}}KA97E-NqpUctv&Be9uH37`xB*eC#7?TaIQ6VGMy+DbP>L{rHQWvr`>cRlbhM+rI8XJ8ynR%hOKQplS<)w7min9yJ67Fz_bM)bX^N%X=A70m{2=Faz`)WRK6}3vMcJA9BX*T|21k*6~Dgz$v(t zkJ7g7?80YPS2vE=dx;ZH&U40+t}u|Ux0dk1p|zplrQA>ozl_%WO!_!3QJQs0r*J5f zMk>FKRK^CBamXoj(9X9K=_b6SJL2ZNOoFmm)D~#}jhnFFK-c!C6GtZ|@@|2y&R@R- z#}^L_oJwZWlc&;X41A}1bR3J>K+3TbZhk!HWr3NU$|T(6?Y50e`C=;N0L9ZzvM4G? zW#rrmH|M6ugEDUIKRG-Sw~xEyQ)%xDMc0mJ-EnWi8%OhI()Nfme!ny6+S`xEM?Pck z+0(hRyK|KI(vGsq1()pk#JwG{j(Eq3j(a+ebsXrhVa`5WvepG@Pu)vjh3y-TWXMJ* zn@xJI4~LYF*x{$!JmTc@ZbH;f^iZPtd)!mbvw6@#L)Eh9SUGKnQoh~YVUHCH_Jm&qs4?m0aYNwLV~6fHTrT6% z;dH^xWiy}{?kW%$*XC_IP)@sbI6a;$CXmic+s^p7n=RN$FE7|MpI$VHXoFDtocnoq zd_Mi=W3f2d7a|?*L^$8<6bqS@FbGX3+(zCW&!h{Em-cV&I^$Ees2>>zKjN1q5r)cH z2RwEPnh3b9OG4>RiI%2xH=b~kUcxKPh^86I*RmN2Agg8%mFD>=M_=w-jPKjOyzi=$puWN&w<~vWh&zCxrJ)OxV;1ediv;c~3 z?U!>Tv(GE!#huW`t-d%FufE|W;ZG3Jo$yKD^ zN*p*uZ5;O|(?Sr^1#p76M0~giM4%3ezTH1BD!y~B#piH((LF_Z-Q8VE*t0O^v>x%PaSdYijfj zU0%6wTvMZO=<>>aV_Rk4C}#;Uk^2S4A(g(=?-v#M*NZca=BMHxtuej%7xW-EH-c|o5@p;ro1=A zi<20w&>&yGGG|s%emj4L1v`J*t~RK z5>?9Eq2z^0zWQUlQC5GcgdqQ|Rq}f@oWd}G#w)e*@2Zj?QwuS8sF(k?QvT}{`==y{ zkjyUEdv__{%}Ty}sXJ6LyPTigBH^}E%`c?FDN99CNJWmMLN=pZ&fZcv*DB>qlVs$D zWQ6jyQ@a)O7%?JaDetZlQ^1+bn0+V4x!$Bl*yG#Hpp8#3iIN~jv6LS=Ax&1Q>xw@c z5jrHL|LA>aJ?ilL*w&>nbPZ;@R55&L{wn6}rv|-b63)A1bZI^(($F!sS0BN?-5(N& z$ueD}_G!l!%=>j^Q5rf=9~{3mg0ddPG3J{2!l(;m6XF`Bp|Wb)bRHk1fjyU;wOSs0 z7sst$`gAaV4enF34CJ8?%D~q0^aI$h%jVqEiY=6;W;ye=h;kl6`gYjjFB4F1zutB0 zNM@!>39yS$uQOL6{W>on zDr6xz&r&(Up5ZhWQSi;jhxln~&n)U?BYit@XTBK0uc+mmrE;cI8T5)^si@3TvS+op zkF-#M@5JRxPaZyMi>`!K1>_uab2y8#G;^V=Ph^tClxz3fnF*WnWs76UU`2u)6F%#- zeXr%q-^8_mqmSTNUdtgn=(84EfL*)1sDkst*1*o;1Zat|Z^>4=b}P6~{ctV%G}d=; zZ7k?FYGKWK*{{8X6NRzooK_rP%5*Et^09JYwKH3;5UbG z)y&p8q@%Xlk2OEa6UkP}1XzQ0Hr3WODHozorB<=^&37zo|Gjf`UFDSy*SC>QUjLJA zr8d&F|AzQXI+LY|Vc>NuF}*eqzn)HDxzob}oxQz#@9wV7?#^zztLt|9x1;MbWTN12 z8`>9ETdg@a8I&m7QLP_d7S9yZ2`@cqkGeF4cN<`jiW&8$#r&LfrV*@mdY@&q&m(o+xU5KuT}S0Enl~B-_~?7br|)*p%Hdh_x6Gc-p{2=$3cn)T_XMt+ z*mn-;O7@+^p%HHN?Gr`SwC@n^Wn$ldmFzn_ZQia|&*OL4dB95*Q?{73N=;kSHp0=w zw)aZ5(bFRw8ew%lXZ&lG4qzy;+&~n@57cg*&H}cR?TykxK~SquNc7!4jv`B zSizj{5XsULwn~4z9*fEa)F2N>LibT^Cv8~OJ8M}3-@|{En5x==e*E+YNY^TC9~huK zL4ONuujU6kwYCdfQ5*P~(9<3&89E{6Q)NG5ZJ_5f_sCCX^uF}+17aL)_ZJE|4-YKx zkRt9+`2BLp=51EXH-@lp6LrBNAdc0#P+RDQ8(}APK(D_bWL7wQ?JSEY3en3y-d9(uZ^kKW5YB5`OmKhy68<1+H0MgnYDWL|m5p37zk$a-N2h zy7a-?p;}I`CU|A#T6h@&6lu{tcULKUPwD(kb=SE=MT#bh%;48{%KGsV6jd-hSp1c1t-?t? z=V+YLDr}FY^coHR+X|Z@B<_;yl5_L?_Y2aJxjRj=C&trk69Sc0) z1%I-pkcd1MxM9GZwns7u%d!XDEIbFj)DfCIS6`o(xuG2sz$R;7mi{*x6MXwD(zS{@ zL>m+Aty_m0g9F@CT?1(FI{mkQg?qG#dPN%>?1@q@Wssm-yItw?e|fzxil0~WErQ;^ zK;zwj15xxo5(gCF*KuGa&-K-IY@)TE7yS7F`DX-Hf(udB?{ACZ2NXe9*Yahb&3fLn zdj7&3II?Csx}N)y2m@XaA+DR=tDIP0ihDzxkBi&tpR?12*vs_4iI%@e5%c z5zj{S_SaHzRl^ry9sk8gQKvRhr)WC9)2vR4g6p5ZvWCw;=Qn-sUh6%p_60oGxd=SK zbNCm}N_>c->4tnz)LhR8)^mUD$CL}T-Y@znTo7aYl5dEr?_Fm8A*Af84`1v1a4@op zf9B`&%Db6mI0a_(%zp`<&wpnGcUv=^k33Jm%8wbyx_Ft#MAO?v%=ImZXcpD0q3IOK zS%ouFJUjf|%X3KADr}FY>9OBkKCI=0?PxeIuMzXHMbh$D zXuKM5A&Qn8aDjDv5qPjjI{u13#wY(AeV*P?-!GI!haSDF8m{_mo}V}E{QQ-3Xuq}D zT=3dOIR=+vzG}t5Un|98$}eAtRd(s!3zZvc45oIy-8>ea-f8(RjYSYgOmz@~PPWr0 zr~|E0>EcP{Sa@Xi-!AZNUzS+-C#{z6Jx{j!b@9c#R1fUR^)ZQu*Vac-bKSZs& F z%jNOG0_R@Dh)H{#m{{JKCCTdN$ZFv75;(N^mJNxeK>{!`GX&0D*Za* z{Oghx<2m_Ri&n4AW9G=uy?Y;QM9dJ?A(CDzF>_j~tFNyu_If%r`=_rF=6-!5Yqk;x zrLB`9+uGP?)@{9RFLR6>@%>+Ed|Kv5L|r%Gha&A-ez3MzYWp(B#Su4tF2?(T?^uBL zE3t7}|Dni#P5;i?-?Z9)4sHb6pCH|&Lu2jVQ6=sI1yvmcsPp!^V*oP9EJlua|36tI zM;sB=CDIt6T8x~Q`08K)_f*FNjf|1QSm1x(!9CjjdPN=!REv?))=e1{=;m`gCeV)w zv+K0U#a~|z=03_d4ce8{vtd$5oKOqEnZIR4-}EF z<-b|un^xn`ZvscwPSKAg`@anotp5>jYOTlL5PEO2(?$UrZTHJmxgozNsh!`OzyIFS zWv$vW3tRC%Fx4TX?Li%AuUn(2^8S06?kDw2^NXqLhpbgwUnHCTI#uX;P_JrhvsK=6 z*49grb?sb^b$wZMow&XALzxGPt~ZhgimYpSz`DNVx=tLpyhG-IGG`|a)LXBw@{YUK zA1Jb3F?W}JH|u&+>iYU~)~YK$3tp_9rm!`mv+A{k@Y(q^Q^58Yd3LawDIxpa+&Q5dv}evyt^Q`e?;vzXn)r2=56~u-;&#Y z0lFQIbJzC&nwqWpW}bgHt#0rACvXLE?!iycm!F-}@3&rzt(tCMDaE-xzf zJw}bC)atc)e7m*DTDAXW+z0XPbo-EA2e#ALL${v3LvMSn*VPvF`S?Ys%iCnDUzZBK z4(dbgp{tJ^uP?QQRG$jD*3PqduDvXCZNX&`9}%>_qSqh2{VU{J+x|SyUh;XiX#02N zzOU#twSB#H^PB7XvyfcZ_06o;O{v$`S?C0<$so=>vR;2O9Oo{{L|L*ccu_aD%{;!H zcs}?jY)5>1WWDaLYP%$H6*Fw$r2KuBM*n6EbbIhg>briOBI~viw=cDcR4+xkb#Y9r z+j`wz<`{Ru`9GEU5mDET_@PLqxH?4{ms!ow!R*mmOk0eaSt*!IZ!e|!BpNP<~85KzYf zjf`!_7~nt>^+Ier)hqHCpjzC%)IL((l)(Vqm;ZKK)BMdB;yAs(QRYHKy^qEPMfj_5 zfwjL<`{idWi}0R<@O85?4;G;9yCeC$u*lc)fLTRcB3ap1!Ji>%{UhhpI*t*D{lYOIC!xjO7IjDw@G>oAQo5C<1o!~J;A zcvr?D zeJKXk@B0hT?apfcSn_A}G+Xu0JpXRm`SVynb!qGu)R%a^)VfV^4v2@Nx>V?P zP@ih?aJLxLhjqKTb^Cp}{T1DAMEkRDH?MAgD7XCpzTiW?i$2u$_2S{=c-X)ntNyH> zZma&8b-QVG`ylB6*3C&Ph$7_}_P|fjUbk+3ogjt$4tciv#2Wg+bKncs&?9U5rvx2>HRymN z)1le8$_aIirU7f{chACp@%N@8>bd{B8LIs%$Cq5o@21!UtfMYS%l+qH_3FerPQUNa z#B1qy|FO)0h&pb@0oL$lZvSt|?Y{sG4?j-{e6#A?YU{V^t9f4DwDa=2>HX}M{YU!a z*z<=GU<4QeMt~7u1Q-EEfDvE>7y(9r5nu!u0Y-okU<4QeMt~7u1Q-EEfDvE>7y(9r z5nu!u0Y-okU<4QeMt~7u1Q-EEfDvE>7y(9r5nu!u0Y-okU<4QeMt~7u1Q-EEfDvE> z7y(9r5nu!u0Y-okU<4QeMt~7u1Q-EEfDvE>7y(9r5nu!u0Y-okU<4QeMt~7u1Q-EE zfDvE>7y(9r5nu!u0Y-okU<4QeMt~7u1Q-EEfDvE>7y(9r5nu!u0Y-okU<4QeMt~7u z1Q-EEfDvE>7y(9r5nu!u0Y-okU<4QeMt~7u1Q-EEfDvE>7y(9r5nu!u0Y-okU<4Qe zMt~7u1Q-EEfDvE>7y(9r5nu!u0Y-okU<4QeMt~7u1Q-EEfDvE>7y(9r5nu!u0Y-ok zU<4QeMt~7u1Q-EEfDvE>7y(9r5nu!u0Y+d2A#k2Qi~u9R2rvSS03*N%FanGKBftnS z0*nA7u&fARnW1F`Ej4ge7y(9r5nu!u0Y-okU<4QeMt~7u1Q>w}6@eC7ARyzHbD?0l z@88{NpKy|1!Yj3N-20G| z&AaxfQ*i5;v8U5M>7^5Pe9Colb<(QMw-;D>DYtXT^(LnZk)c+LGyZ<(Gr#;G=wFt# z`u+V!`s2o#O9}Tq{V(hK@A{9Sethr$(cjNJ;P1ciQ~&y%*j+;TGsyGoU``-dDhzh?^$bb<2}}({f;$w+pIMhpRxvj z?{Q;)6qi1V2e}U4qijm{rTxl0TpAuS&Wtm0AokT?OvJwW)S1|06BDt=E)Owf4O~*lKj&yS8q# zYLydQ>-)=%M)uux@@=E?f^vD^d_J)6x1T!GH}lk)-d{{i^!D`h^uE;7)0f9~4s*Uu z*w8=4z7P8@p06>7n~%fXCM%~+#M;nd%-OQoXsxJN+CN;{2j}SeAI%!i*J9EDNf~mi zMyObXP078)UmL~seOS3ODeFzcbl!9zeRHKT5Qx$LBsQ#B_rF)#pT(ZG7qM-!EJ|xSUt@n1ZX+6Jn(+4fhUw<|tkovv z&AoB{|BTW-Y?iLiG~HRN#pJ%It`RP8oHw(t|7aNZYBu!snw3{AKc6?lIVw)#`P%(+ z*1%YwH2}>RxC7f-9Iu|WVn4%}=q$EB!IsDNIcx{9?ZdVc8`i;N?Rf6?Z`l6E+*g_U zG)`ysEVCJLk{+)Hhva&i;e)YxFb9|e%mL;AbAUO(9AFMG2bcpFHV4G}6v4X`kKw(FFaP9B>><2M z@etmlC}MN5jm=uI6WHQ7{w?h9#rqY%i!^w@BKA8t7x7(-W_*6jHZ}IyyA+>AhqN`z z6DxYQ{UyUqO{;!ymO8N=i~u9B0ugA$_^S8Ky+$#ReTN@2O4s}IKQv4C->;dab8j|F z_ub8A>AwEgX6c@IW7Y)UJAQf2sJy<`f9^9W&ooW%%U?9IuYcpKkDHX)_r;BKCh1Jm z^l$!8v-`f1Gs3U;st1kIncBzcF6_Apw;#73bAUO(9AFMG2bcrQ0phJJ7-zFv)JhOynce;_qt(XqVEZ8INxuVeyidfey;+X_-!vb*M+UG zw9zp(ex}^mXUZ2cp217u(%x#44?WXv)Yc?bVlx;4Mqni(&}hBq@^}o%9AFMG2bcrQ z0pFb9|e z%mL;AbKv91feU#ay&Sf$#5o?Fb9|e%z>54f!OCP zD|XrZ@4v-jVjdIx#+G(c)9hG?zx{^4_ZH0UOMlMmOQ;NO+E1-I^{@Whu3FcFYyDR~ zG7=m&dM|#zuWyC5QxV+DuB5AZ9}8W`+t7_ Date: Tue, 8 Nov 2022 23:36:39 +0100 Subject: [PATCH 361/608] ci(ct-base): update action versions #8932 --- .github/workflows/container_base_push.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 1ef8ba94e78..519e135f944 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -38,15 +38,15 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Set up JDK ${{ matrix.jdk }} - uses: actions/setup-java@v2 + uses: actions/setup-java@v3 with: java-version: ${{ matrix.jdk }} distribution: 'adopt' - name: Cache Maven packages - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/.m2 key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} @@ -70,7 +70,7 @@ jobs: - if: ${{ github.event_name != 'pull_request' }} name: Log in to the Container registry - uses: docker/login-action@v1 + uses: docker/login-action@v2 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} From 7d4388ed5022e64a1db721160169d93a2c565007 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 8 Nov 2022 23:42:20 +0100 Subject: [PATCH 362/608] ci(ct-base): fix step if-conditions for branch names #8932 Github context offers ".ref" but we need ".ref_name" to match *just* the branch name. --- .github/workflows/container_base_push.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 519e135f944..5a7280ce3b1 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -58,7 +58,7 @@ jobs: # Run anything below only if this is not a pull request. # Accessing, pushing tags etc. to DockerHub will only succeed in upstream because secrets. - - if: ${{ github.event_name != 'pull_request' && github.ref == 'develop' }} + - if: ${{ github.event_name == 'push' && github.ref_name == 'develop' }} name: Push description to DockerHub uses: peter-evans/dockerhub-description@v3 with: @@ -78,7 +78,7 @@ jobs: name: Set up QEMU for multi-arch builds uses: docker/setup-qemu-action@v2 - name: Re-set image tag based on branch - if: ${{ github.ref == 'master' }} + if: ${{ github.ref_name == 'master' }} run: echo "IMAGE_TAG=release" - if: ${{ github.event_name != 'pull_request' }} name: Deploy multi-arch base container image to Docker Hub From 3d790aacc7ffd4f44e8fb9a4880400960b52b48d Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 8 Nov 2022 23:50:31 +0100 Subject: [PATCH 363/608] ci(ct-base): fix failing image pushes #8932 The login to the registry needs to be explicit otherwise pushes will fail to acquire the correct token and pushes are rejected with "insufficient_scope: authorization failed" --- .github/workflows/container_base_push.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 5a7280ce3b1..fc0a3564e50 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -72,6 +72,7 @@ jobs: name: Log in to the Container registry uses: docker/login-action@v2 with: + registry: ${{ env.REGISTRY }} username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - if: ${{ github.event_name != 'pull_request' }} From 609688092192e674686243096fcc45a9e4086826 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 9 Nov 2022 15:18:48 +0100 Subject: [PATCH 364/608] docs(ct-base): rephrase slightly to match wording in main index Co-authored-by: Benjamin Peuch --- doc/sphinx-guides/source/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/index.rst b/doc/sphinx-guides/source/index.rst index be32e94d80f..0cd01b8a5a7 100755 --- a/doc/sphinx-guides/source/index.rst +++ b/doc/sphinx-guides/source/index.rst @@ -31,7 +31,7 @@ The User Guide is further divided into primary activities: finding & using data, adding Datasets, administering dataverses or Datasets, and Dataset exploration/visualizations. Details on all of the above tasks can be found in the Users Guide. The Installation Guide is for people or organizations who want to host their -own Dataverse. The Container Guide adds to this information on container-based installations. +own Dataverse. The Container Guide gives information on how to deploy Dataverse with containers. The Developer Guide contains instructions for people who want to contribute to the Open Source Dataverse project or who want to modify the code to suit their own needs. Finally, the API Guide is for From 4a79dcbddde84251c4a975e3b858d00171ffef66 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 9 Nov 2022 15:25:33 +0100 Subject: [PATCH 365/608] docs(ct-base): apply some language tweaks to docs pages Co-authored-by: Benjamin Peuch --- doc/sphinx-guides/source/container/index.rst | 2 +- modules/container-base/README.md | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/sphinx-guides/source/container/index.rst b/doc/sphinx-guides/source/container/index.rst index f6c99bfc19e..6d22318ad03 100644 --- a/doc/sphinx-guides/source/container/index.rst +++ b/doc/sphinx-guides/source/container/index.rst @@ -9,7 +9,7 @@ Container Guide Running Dataverse software in containers is quite different than in a :doc:`classic installation <../installation/prep>`. -Both approaches have pros and cons. These days (2022) containers are very often used for development and testing, +Both approaches have pros and cons. These days, containers are very often used for development and testing, but there is an ever rising move for running applications in the cloud using container technology. **NOTE:** diff --git a/modules/container-base/README.md b/modules/container-base/README.md index d6f93b14da7..ce48eae8a65 100644 --- a/modules/container-base/README.md +++ b/modules/container-base/README.md @@ -1,7 +1,7 @@ # Dataverse Base Container Image A "base image" offers you a pre-installed and pre-tuned application server to deploy Dataverse software to. -Adding basic functionality like executing scripts at container boot, monitoring, memory tweaks etc is all done +Adding basic functionality like executing scripts at container boot, monitoring, memory tweaks, etc., is all done at this layer, to make the application image focus on the app itself. ## Quick Reference @@ -17,14 +17,14 @@ provides in-depth information about content, building, tuning and so on for this **Where to get help and ask questions:** -IQSS will not offer you support how to deploy or run it, please reach out to the community for help on using it. +IQSS will not offer you support how to deploy or run it. Please reach out to the community for help on using it. You can join the Community Chat on Matrix at https://chat.dataverse.org or the Community Slack at https://dataversecommunity.slack.com to ask for help and guidance. ## Supported Image Tags This image is sourced within the main upstream code [repository of the Dataverse software](https://github.com/IQSS/dataverse). -Development and maintenance happens there (again, by the community). Community supported image tags are based on the two +Development and maintenance happens there (again, by the community). Community-supported image tags are based on the two most important branches: - `develop` representing the unstable state of affairs in Dataverse's development branch @@ -32,7 +32,7 @@ most important branches: - `release` representing the latest stable release in Dataverse's main branch ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/master/modules/container-base/src/main/docker/Dockerfile)) -Within the main repository, you may find the base image's files at `/modules/container-base`. +Within the main repository, you may find the base image files at `/modules/container-base`. This Maven module uses the `Maven Docker Plugin `_ to build and ship the image. You may use, extend, or alter this image to your liking and/or host in some different registry if you want to. @@ -49,7 +49,7 @@ Unless required by applicable law or agreed to in writing, software distributed See the License for the specific language governing permissions and limitations under the License. As with all Docker images, all images likely also contain other software which may be under other licenses (such as -[Payara Server](https://github.com/payara/Payara/blob/master/LICENSE.txt), Bash, etc from the base +[Payara Server](https://github.com/payara/Payara/blob/master/LICENSE.txt), Bash, etc., from the base distribution, along with any direct or indirect (Java) dependencies contained). As for any pre-built image usage, it is the image user's responsibility to ensure that any use of this image complies From c4e5028928302b183530d23159ee5e0f807f08b0 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 10 Nov 2022 11:42:59 +0100 Subject: [PATCH 366/608] refactor(metadata): rename CodeMeta softwareVersion to codeVersion #7844 As the citation block already contains a compound field "software" with both "softwareName" and "softwareVersion", meant to describe software used to create the dataset, this name conflict must be resolved. By renaming to "codeVersion", the semantic is not changed, as this metadata block is about describing software deposits. As the termURI is explicitly set to "schema.org/softwareVersion" it remains compatible with OAI-ORE and other linked data usages. A future exporter for CodeMeta might require special attention for this field. --- scripts/api/data/metadatablocks/codemeta.tsv | 2 +- src/main/java/propertyFiles/codeMeta20.properties | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/api/data/metadatablocks/codemeta.tsv b/scripts/api/data/metadatablocks/codemeta.tsv index 029ca2355ec..3c872426387 100644 --- a/scripts/api/data/metadatablocks/codemeta.tsv +++ b/scripts/api/data/metadatablocks/codemeta.tsv @@ -1,7 +1,7 @@ #metadataBlock name dataverseAlias displayName blockURI codeMeta20 Software Metadata (CodeMeta v2.0) https://codemeta.github.io/terms/ #datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id termURI - softwareVersion Software Version Version of the software instance, usually following some convention like SemVer etc. e.g. 0.2.1 or 1.3 or 2021.1 etc text 0 #VALUE TRUE FALSE FALSE TRUE TRUE FALSE codeMeta20 https://schema.org/softwareVersion + codeVersion Software Version Version of the software instance, usually following some convention like SemVer etc. e.g. 0.2.1 or 1.3 or 2021.1 etc text 0 #VALUE TRUE FALSE FALSE TRUE TRUE FALSE codeMeta20 https://schema.org/softwareVersion developmentStatus Development Status Description of development status, e.g. work in progress (wip), active, etc. See repostatus.org for more information. text 1 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE codeMeta20 https://www.repostatus.org codeRepository Code Repository Link to the repository where the un-compiled, human readable code and related code is located (SVN, GitHub, CodePlex, institutional GitLab instance, Gitea, etc.). e.g. https://github.com/user/project url 2 #VALUE TRUE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/codeRepository applicationCategory Application Category Type of software application, e.g. Simulation, Analysis, Visualisation. text 3 #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/applicationCategory diff --git a/src/main/java/propertyFiles/codeMeta20.properties b/src/main/java/propertyFiles/codeMeta20.properties index e203c1e46e9..5f788df4e83 100644 --- a/src/main/java/propertyFiles/codeMeta20.properties +++ b/src/main/java/propertyFiles/codeMeta20.properties @@ -1,8 +1,8 @@ metadatablock.name=codeMeta20 metadatablock.displayName=Software Metadata (CodeMeta 2.0) -datasetfieldtype.softwareVersion.title=Software Version -datasetfieldtype.softwareVersion.description=Version of the software instance, usually following some convention like SemVer etc. -datasetfieldtype.softwareVersion.watermark=e.g. 0.2.1 or 1.3 or 2021.1 etc +datasetfieldtype.codeVersion.title=Software Version +datasetfieldtype.codeVersion.description=Version of the software instance, usually following some convention like SemVer etc. +datasetfieldtype.codeVersion.watermark=e.g. 0.2.1 or 1.3 or 2021.1 etc datasetfieldtype.developmentStatus.title=Development Status datasetfieldtype.developmentStatus.description=Description of development status, e.g. work in progress (wip), active, etc. See repostatus.org for more information. datasetfieldtype.developmentStatus.watermark= Development Status From d79b4aa3ad1f99ab61d0330462c41c36f478514c Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 10 Nov 2022 11:44:57 +0100 Subject: [PATCH 367/608] style(metadata): rephrase CodeMeta storage and memory requirements descriptions #7844 A slight rephrasing should make it easier to understand what is expected as content for these metadata fields. --- scripts/api/data/metadatablocks/codemeta.tsv | 4 ++-- src/main/java/propertyFiles/codeMeta20.properties | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/api/data/metadatablocks/codemeta.tsv b/scripts/api/data/metadatablocks/codemeta.tsv index 3c872426387..b65cf56b1af 100644 --- a/scripts/api/data/metadatablocks/codemeta.tsv +++ b/scripts/api/data/metadatablocks/codemeta.tsv @@ -18,8 +18,8 @@ softwareSuggestions Name & Version Name and version of the optional software/library dependency e.g. Sphinx 5.0.2 text 0 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE softwareSuggestionsItem codeMeta20 https://codemeta.github.io/terms/softwareSuggestions softwareSuggestionsInfoUrl Info URL Link to optional software/library homepage or documentation (ideally also versioned) e.g. https://www.sphinx-doc.org url 1 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE softwareSuggestionsItem codeMeta20 https://dataverse.org/schema/codeMeta20/softwareSuggestionsInfoUrl memoryRequirements Memory Requirements Minimum memory requirements. text 12 #VALUE TRUE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/memoryRequirements - processorRequirements Processor Requirements Processor architecture required to run the application (e.g. IA64). text 13 #VALUE TRUE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/processorRequirements - storageRequirements Storage Requirements Storage requirements (e.g. free space required). text 14 #VALUE TRUE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/storageRequirements + processorRequirements Processor Requirements Processor architecture or other CPU requirements to run the application (e.g. IA64). text 13 #VALUE TRUE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/processorRequirements + storageRequirements Storage Requirements Minimum storage requirements (e.g. free space required). text 14 #VALUE TRUE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/storageRequirements permissions Permissions Permission(s) required to run the code (for example, a mobile app may require full internet access or may run only on wifi). text 15 #VALUE TRUE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/permissions softwareHelp Software Help/Documentation Link to help texts or documentation e.g. https://user.github.io/project/docs url 16 #VALUE FALSE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/softwareHelp readme Readme Link to the README of the project e.g. https://github.com/user/project/blob/main/README.md url 17 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://codemeta.github.io/terms/readme diff --git a/src/main/java/propertyFiles/codeMeta20.properties b/src/main/java/propertyFiles/codeMeta20.properties index 5f788df4e83..92153ccb10a 100644 --- a/src/main/java/propertyFiles/codeMeta20.properties +++ b/src/main/java/propertyFiles/codeMeta20.properties @@ -52,10 +52,10 @@ datasetfieldtype.memoryRequirements.title=Memory Requirements datasetfieldtype.memoryRequirements.description=Minimum memory requirements. datasetfieldtype.memoryRequirements.watermark= datasetfieldtype.processorRequirements.title=Processor Requirements -datasetfieldtype.processorRequirements.description=Processor architecture required to run the application (e.g. IA64). +datasetfieldtype.processorRequirements.description=Processor architecture or other CPU requirements to run the application (e.g. IA64). datasetfieldtype.processorRequirements.watermark= datasetfieldtype.storageRequirements.title=Storage Requirements -datasetfieldtype.storageRequirements.description=Storage requirements (e.g. free space required). +datasetfieldtype.storageRequirements.description=Minimum storage requirements (e.g. free space required). datasetfieldtype.storageRequirements.watermark= datasetfieldtype.permissions.title=Permissions datasetfieldtype.permissions.description=Permission(s) required to run the code (for example, a mobile app may require full internet access or may run only on wifi). From 8d5edf23a13631e878c413e55c320cb704a579b5 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 10 Nov 2022 12:35:50 +0100 Subject: [PATCH 368/608] feat(metadata): add CodeMeta fields to Solr schema #7844 Adding the fields of the CodeMeta block to the Solr schema to enable quick usage of the fields (despite being flagged experimental in the guides). --- conf/solr/8.11.1/schema.xml | 48 ++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/conf/solr/8.11.1/schema.xml b/conf/solr/8.11.1/schema.xml index 63312ab5d40..2656abf0dc5 100644 --- a/conf/solr/8.11.1/schema.xml +++ b/conf/solr/8.11.1/schema.xml @@ -405,9 +405,31 @@ + + + + + + + + + + + + + + + + + + + + + + + - @@ -645,6 +667,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + - 5.0.0-RC1 + 5.0.0-RC2 1.15.0 From 3d1e98c5a9f5f755d8d78b6151b659fe2377f3ed Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 2 Dec 2022 13:27:40 -0500 Subject: [PATCH 439/608] this method was renamed in RC2 (#8843) --- .../harvest/server/xoai/DataverseXoaiItemRepository.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiItemRepository.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiItemRepository.java index faf3cf9ddc4..147d42648fa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiItemRepository.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiItemRepository.java @@ -49,7 +49,7 @@ public DataverseXoaiItemRepository (OAIRecordServiceBean recordService, DatasetS } @Override - public ItemIdentifier getItem(String identifier) throws IdDoesNotExistException { + public ItemIdentifier getItemIdentifier(String identifier) throws IdDoesNotExistException { // This method is called when ListMetadataFormats request specifies // the identifier, requesting the formats available for this specific record. // In our case, under the current implementation, we need to simply look From aeffa3b6fc13a029b70630d856b5f0373a333903 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 5 Dec 2022 20:41:24 -0500 Subject: [PATCH 440/608] a few extra oai tests (#8843) --- .../iq/dataverse/api/HarvestingServerIT.java | 222 +++++++++++++----- .../edu/harvard/iq/dataverse/api/UtilIT.java | 10 + 2 files changed, 176 insertions(+), 56 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index fdd034ab12e..5355b57490d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -10,7 +10,12 @@ import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import com.jayway.restassured.response.Response; import com.jayway.restassured.path.json.JsonPath; +import com.jayway.restassured.path.xml.XmlPath; +import com.jayway.restassured.path.xml.element.Node; import static edu.harvard.iq.dataverse.api.UtilIT.API_TOKEN_HTTP_HEADER; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; import javax.json.Json; import javax.json.JsonArray; import static javax.ws.rs.core.Response.Status.FORBIDDEN; @@ -24,18 +29,32 @@ import static org.junit.Assert.assertTrue; /** - * extremely minimal API tests for creating OAI sets. + * Tests for the Harvesting Server functionality + * Note that these test BOTH the proprietary Dataverse rest APIs for creating + * and managing sets, AND the OAI-PMH functionality itself. */ public class HarvestingServerIT { private static final Logger logger = Logger.getLogger(HarvestingServerIT.class.getCanonicalName()); + private static String normalUserAPIKey; + private static String adminUserAPIKey; + private static String singleSetDatasetIdentifier; + private static String singleSetDatasetPersistentId; + @BeforeClass public static void setUpClass() { RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); // enable harvesting server // Gave some thought to storing the original response, and resetting afterwards - but that appears to be more complexity than it's worth Response enableHarvestingServerResponse = UtilIT.setSetting(SettingsServiceBean.Key.OAIServerEnabled,"true"); + + // Create users: + setupUsers(); + + // Create and publish some datasets: + setupDatasets(); + } @AfterClass @@ -44,7 +63,7 @@ public static void afterClass() { Response enableHarvestingServerResponse = UtilIT.setSetting(SettingsServiceBean.Key.OAIServerEnabled,"false"); } - private void setupUsers() { + private static void setupUsers() { Response cu0 = UtilIT.createRandomUser(); normalUserAPIKey = UtilIT.getApiTokenFromResponse(cu0); Response cu1 = UtilIT.createRandomUser(); @@ -52,6 +71,40 @@ private void setupUsers() { Response u1a = UtilIT.makeSuperUser(un1); adminUserAPIKey = UtilIT.getApiTokenFromResponse(cu1); } + + private static void setupDatasets() { + // create dataverse: + Response createDataverseResponse = UtilIT.createRandomDataverse(adminUserAPIKey); + createDataverseResponse.prettyPrint(); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + // publish dataverse: + Response publishDataverse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, adminUserAPIKey); + assertEquals(OK.getStatusCode(), publishDataverse.getStatusCode()); + + // create dataset: + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, adminUserAPIKey); + createDatasetResponse.prettyPrint(); + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDatasetResponse); + + // retrieve the global id: + singleSetDatasetPersistentId = UtilIT.getDatasetPersistentIdFromResponse(createDatasetResponse); + + // publish dataset: + Response publishDataset = UtilIT.publishDatasetViaNativeApi(singleSetDatasetPersistentId, "major", adminUserAPIKey); + assertEquals(200, publishDataset.getStatusCode()); + + singleSetDatasetIdentifier = singleSetDatasetPersistentId.substring(singleSetDatasetPersistentId.lastIndexOf('/') + 1); + + logger.info("identifier: " + singleSetDatasetIdentifier); + + // Publish command is executed asynchronously, i.e. it may + // still be running after we received the OK from the publish API. + // The oaiExport step also requires the metadata exports to be done and this + // takes longer than just publish/reindex. + // So wait for all of this to finish. + UtilIT.sleepForReexport(singleSetDatasetPersistentId, adminUserAPIKey, 10); + } private String jsonForTestSpec(String name, String def) { String r = String.format("{\"name\":\"%s\",\"definition\":\"%s\"}", name, def);//description is optional @@ -63,20 +116,84 @@ private String jsonForEditSpec(String name, String def, String desc) { return r; } - private String normalUserAPIKey; - private String adminUserAPIKey; + private XmlPath validateOaiVerbResponse(Response oaiResponse, String verb) { + // confirm that the response is in fact XML: + XmlPath responseXmlPath = oaiResponse.getBody().xmlPath(); + assertNotNull(responseXmlPath); + + String dateString = responseXmlPath.getString("OAI-PMH.responseDate"); + assertNotNull(dateString); // TODO: validate that it's well-formatted! + logger.info("date string from the OAI output:"+dateString); + assertEquals("http://localhost:8080/oai", responseXmlPath.getString("OAI-PMH.request")); + assertEquals(verb, responseXmlPath.getString("OAI-PMH.request.@verb")); + return responseXmlPath; + } + + @Test + public void testOaiIdentify() { + // Run Identify: + Response identifyResponse = UtilIT.getOaiIdentify(); + assertEquals(OK.getStatusCode(), identifyResponse.getStatusCode()); + //logger.info("Identify response: "+identifyResponse.prettyPrint()); + + // Validate the response: + + XmlPath responseXmlPath = validateOaiVerbResponse(identifyResponse, "Identify"); + assertEquals("http://localhost:8080/oai", responseXmlPath.getString("OAI-PMH.Identify.baseURL")); + // Confirm that the server is reporting the correct parameters that + // our server implementation should be using: + assertEquals("2.0", responseXmlPath.getString("OAI-PMH.Identify.protocolVersion")); + assertEquals("transient", responseXmlPath.getString("OAI-PMH.Identify.deletedRecord")); + assertEquals("YYYY-MM-DDThh:mm:ssZ", responseXmlPath.getString("OAI-PMH.Identify.granularity")); + } + + @Test + public void testOaiListMetadataFormats() { + // Run ListMeatadataFormats: + Response listFormatsResponse = UtilIT.getOaiListMetadataFormats(); + assertEquals(OK.getStatusCode(), listFormatsResponse.getStatusCode()); + //logger.info("ListMetadataFormats response: "+listFormatsResponse.prettyPrint()); + + // Validate the response: + + XmlPath responseXmlPath = validateOaiVerbResponse(listFormatsResponse, "ListMetadataFormats"); + + // Check the payload of the response atgainst the list of metadata formats + // we are currently offering under OAI; will need to be explicitly + // modified if/when we add more harvestable formats. + + List listFormats = responseXmlPath.getList("OAI-PMH.ListMetadataFormats.metadataFormat"); + + assertNotNull(listFormats); + assertEquals(5, listFormats.size()); + + // The metadata formats are reported in an unpredictable ordder. We + // want to sort the prefix names for comparison purposes, and for that + // they need to be saved in a modifiable list: + List metadataPrefixes = new ArrayList<>(); + + for (int i = 0; i < listFormats.size(); i++) { + metadataPrefixes.add(responseXmlPath.getString("OAI-PMH.ListMetadataFormats.metadataFormat["+i+"].metadataPrefix")); + } + Collections.sort(metadataPrefixes); + + assertEquals("[Datacite, dataverse_json, oai_datacite, oai_dc, oai_ddi]", metadataPrefixes.toString()); + + } + + @Test - public void testSetCreation() { - setupUsers(); + public void testSetCreateAPIandOAIlistIdentifiers() { + // Create the set with Dataverse /api/harvest/server API: String setName = UtilIT.getRandomString(6); String def = "*"; // make sure the set does not exist - String u0 = String.format("/api/harvest/server/oaisets/%s", setName); + String setPath = String.format("/api/harvest/server/oaisets/%s", setName); String createPath ="/api/harvest/server/oaisets/add"; Response r0 = given() - .get(u0); + .get(setPath); assertEquals(404, r0.getStatusCode()); // try to create set as normal user, should fail @@ -94,7 +211,7 @@ public void testSetCreation() { assertEquals(201, r2.getStatusCode()); Response getSet = given() - .get(u0); + .get(setPath); logger.info("getSet.getStatusCode(): " + getSet.getStatusCode()); logger.info("getSet printresponse: " + getSet.prettyPrint()); @@ -118,17 +235,19 @@ public void testSetCreation() { Response r4 = UtilIT.exportOaiSet(setName); assertEquals(200, r4.getStatusCode()); - // try to delete as normal user should fail + + + // try to delete as normal user, should fail Response r5 = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey) - .delete(u0); + .delete(setPath); logger.info("r5.getStatusCode(): " + r5.getStatusCode()); assertEquals(400, r5.getStatusCode()); - // try to delete as admin user should work + // try to delete as admin user, should work Response r6 = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) - .delete(u0); + .delete(setPath); logger.info("r6.getStatusCode(): " + r6.getStatusCode()); assertEquals(200, r6.getStatusCode()); @@ -136,7 +255,7 @@ public void testSetCreation() { @Test public void testSetEdit() { - setupUsers(); + //setupUsers(); String setName = UtilIT.getRandomString(6); String def = "*"; @@ -195,46 +314,17 @@ public void testSetEdit() { // OAI set with that one dataset, and attempt to retrieve the OAI record // with GetRecord. @Test - public void testOaiFunctionality() throws InterruptedException { + public void testSingleRecordOaiSet() throws InterruptedException { - setupUsers(); - - // create dataverse: - Response createDataverseResponse = UtilIT.createRandomDataverse(adminUserAPIKey); - createDataverseResponse.prettyPrint(); - String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + //setupUsers(); - // publish dataverse: - Response publishDataverse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, adminUserAPIKey); - assertEquals(OK.getStatusCode(), publishDataverse.getStatusCode()); - - // create dataset: - Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, adminUserAPIKey); - createDatasetResponse.prettyPrint(); - Integer datasetId = UtilIT.getDatasetIdFromResponse(createDatasetResponse); - - // retrieve the global id: - String datasetPersistentId = UtilIT.getDatasetPersistentIdFromResponse(createDatasetResponse); - - // publish dataset: - Response publishDataset = UtilIT.publishDatasetViaNativeApi(datasetPersistentId, "major", adminUserAPIKey); - assertEquals(200, publishDataset.getStatusCode()); - - String identifier = datasetPersistentId.substring(datasetPersistentId.lastIndexOf('/') + 1); - - logger.info("identifier: " + identifier); + - // Let's try and create an OAI set with the dataset we have just - // created and published: - // - however, publish command is executed asynchronously, i.e. it may - // still be running after we received the OK from the publish API. - // The oaiExport step also requires the metadata exports to be done and this - // takes longer than just publish/reindex. - // So wait for all of this to finish. - UtilIT.sleepForReexport(datasetPersistentId, adminUserAPIKey, 10); + // Let's try and create an OAI set with the "single set dataset" that + // was created as part of the initial setup: - String setName = identifier; - String setQuery = "dsPersistentId:" + identifier; + String setName = singleSetDatasetIdentifier; + String setQuery = "dsPersistentId:" + singleSetDatasetIdentifier; String apiPath = String.format("/api/harvest/server/oaisets/%s", setName); String createPath ="/api/harvest/server/oaisets/add"; Response createSetResponse = given() @@ -277,12 +367,18 @@ public void testOaiFunctionality() throws InterruptedException { // There should be 1 and only 1 record in the response: assertEquals(1, ret.size()); // And the record should be the dataset we have just created: - assertEquals(datasetPersistentId, listIdentifiersResponse.getBody().xmlPath() + assertEquals(singleSetDatasetPersistentId, listIdentifiersResponse.getBody().xmlPath() .getString("OAI-PMH.ListIdentifiers.header.identifier")); break; } Thread.sleep(1000L); - } while (i")); // And now run GetRecord on the OAI record for the dataset: - Response getRecordResponse = UtilIT.getOaiRecord(datasetPersistentId, "oai_dc"); - - assertEquals(datasetPersistentId, getRecordResponse.getBody().xmlPath().getString("OAI-PMH.GetRecord.record.header.identifier")); + Response getRecordResponse = UtilIT.getOaiRecord(singleSetDatasetPersistentId, "oai_dc"); + + System.out.println("GetRecord response in its entirety: "+getRecordResponse.getBody().prettyPrint()); + System.out.println("one more time:"); + getRecordResponse.prettyPrint(); + + assertEquals(singleSetDatasetPersistentId, getRecordResponse.getBody().xmlPath().getString("OAI-PMH.GetRecord.record.header.identifier")); // TODO: // check the actual metadata payload of the OAI record more carefully? } + + // This test will attempt to create a set with multiple records (enough + // to trigger a paged response with a continuation token) and test its + // performance. + + + @Test + public void testMultiRecordOaiSet() throws InterruptedException { + + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 550d4ed1264..9fa47db167b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -2620,6 +2620,16 @@ static Response exportOaiSet(String setName) { return given().put(apiPath); } + static Response getOaiIdentify() { + String oaiVerbPath = "/oai?verb=Identify"; + return given().get(oaiVerbPath); + } + + static Response getOaiListMetadataFormats() { + String oaiVerbPath = "/oai?verb=ListMetadataFormats"; + return given().get(oaiVerbPath); + } + static Response getOaiRecord(String datasetPersistentId, String metadataFormat) { String apiPath = String.format("/oai?verb=GetRecord&identifier=%s&metadataPrefix=%s", datasetPersistentId, metadataFormat); return given().get(apiPath); From 51fc6029c3a905d9f7c3fd5243b64fd4a8b6029e Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 5 Dec 2022 20:43:49 -0500 Subject: [PATCH 441/608] small change in the guide per feedback (#7940) --- doc/sphinx-guides/source/admin/harvestclients.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/admin/harvestclients.rst b/doc/sphinx-guides/source/admin/harvestclients.rst index 6a76f721162..e94a6aa1730 100644 --- a/doc/sphinx-guides/source/admin/harvestclients.rst +++ b/doc/sphinx-guides/source/admin/harvestclients.rst @@ -33,7 +33,7 @@ For example: sudo touch /usr/local/payara5/glassfish/domains/domain1/logs/stopharvest_bigarchive.70916 sudo chown dataverse /usr/local/payara5/glassfish/domains/domain1/logs/stopharvest_bigarchive.70916 -We recommend that stop stop any running harvesting jobs using this mechanism if you need to restart the application server, otherwise the ongoing harvest will be killed, but may be left marked as if it's still in progress in the database. +Note: If the application server is stopped and restarted, any running harvesting jobs will be killed but may remain marked as in progress in the database. We thus recommend using the mechanism here to stop ongoing harvests prior to a server restart. What if a Run Fails? From a19021089b46b6ac8051d8df313fd8e622145cb7 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 5 Dec 2022 21:20:16 -0500 Subject: [PATCH 442/608] typo (#7940) --- .../dataverse/harvest/client/HarvestingClientServiceBean.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java index f2a3483c84f..13cc44ce919 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java @@ -167,7 +167,7 @@ public void deleteClient(Long clientId) { @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public void setHarvestSuccess(Long hcId, Date currentTime, int harvestedCount, int failedCount, int deletedCount) { - recordHarvestJobStatus(hcId, currentTime, harvestedCount, failedCount, deletedCount, ClientHarvestRun.RunResultType.INTERRUPTED); + recordHarvestJobStatus(hcId, currentTime, harvestedCount, failedCount, deletedCount, ClientHarvestRun.RunResultType.SUCCESS); } @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) From 4b60983e360b3ee4b5a50535b769852fc9ea67ef Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 6 Dec 2022 09:25:42 +0100 Subject: [PATCH 443/608] refactor(settings): remove unused Config var in SystemConfig #7000 --- src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index fe95f53d293..fc7fd7beb06 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -10,8 +10,6 @@ import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.validation.PasswordValidatorUtil; -import org.eclipse.microprofile.config.Config; -import org.eclipse.microprofile.config.ConfigProvider; import org.passay.CharacterRule; import javax.ejb.EJB; @@ -46,7 +44,6 @@ public class SystemConfig { private static final Logger logger = Logger.getLogger(SystemConfig.class.getCanonicalName()); - private static final Config config = ConfigProvider.getConfig(); @EJB SettingsServiceBean settingsService; @@ -133,7 +130,6 @@ public String getVersion(boolean withBuildNumber) { // It will default to read from microprofile-config.properties source, // which contains in the source a Maven property reference to ${project.version}. // When packaging the app to deploy it, Maven will replace this, rendering it a static entry. - // NOTE: MicroProfile Config will cache the entry for us in internal maps. String appVersion = JvmSettings.VERSION.lookup(); if (withBuildNumber) { From effd64f5043773ec36bb90ed283293ebb77d1586 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 6 Dec 2022 13:54:52 -0500 Subject: [PATCH 444/608] #3621 update placeholders for schema and namespace --- .../harvest/server/web/servlet/OAIServlet.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java index 5eacb1addb6..3cfdcc1737d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java @@ -96,9 +96,15 @@ public class OAIServlet extends HttpServlet { // be calling ListIdentifiers, and then making direct calls to the export // API of the remote Dataverse, to obtain the records in native json. This // is how we should have implemented this in the first place, really. + /* + SEK + per #3621 we are adding urls to the namespace and schema + These will not resolve presently. the change is so that the + xml produced by https://demo.dataverse.org/oai?verb=ListMetadataFormats will validate + */ private static final String DATAVERSE_EXTENDED_METADATA_FORMAT = "dataverse_json"; - private static final String DATAVERSE_EXTENDED_METADATA_NAMESPACE = "Custom Dataverse metadata in JSON format (Dataverse4 to Dataverse4 harvesting only)"; - private static final String DATAVERSE_EXTENDED_METADATA_SCHEMA = "JSON schema pending"; + private static final String DATAVERSE_EXTENDED_METADATA_NAMESPACE = "https://dataverse.org/schema/core#"; + private static final String DATAVERSE_EXTENDED_METADATA_SCHEMA = "https://dataverse.org/schema/core.xsd"; private Context xoaiContext; private SetRepository setRepository; From 7a244406d36ad5f5a9ad6e01e1e29c149935324c Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 6 Dec 2022 16:02:47 -0500 Subject: [PATCH 445/608] #3621 remove # --- .../iq/dataverse/harvest/server/web/servlet/OAIServlet.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java index 3cfdcc1737d..f778fd56644 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java @@ -103,7 +103,7 @@ public class OAIServlet extends HttpServlet { xml produced by https://demo.dataverse.org/oai?verb=ListMetadataFormats will validate */ private static final String DATAVERSE_EXTENDED_METADATA_FORMAT = "dataverse_json"; - private static final String DATAVERSE_EXTENDED_METADATA_NAMESPACE = "https://dataverse.org/schema/core#"; + private static final String DATAVERSE_EXTENDED_METADATA_NAMESPACE = "https://dataverse.org/schema/core"; private static final String DATAVERSE_EXTENDED_METADATA_SCHEMA = "https://dataverse.org/schema/core.xsd"; private Context xoaiContext; From 8e70d995e8bffb4daa154e86a1e62e2c4f97788e Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 7 Dec 2022 12:32:23 -0500 Subject: [PATCH 446/608] added a release note (#7940) --- doc/release-notes/7940-stop-harvest-in-progress | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 doc/release-notes/7940-stop-harvest-in-progress diff --git a/doc/release-notes/7940-stop-harvest-in-progress b/doc/release-notes/7940-stop-harvest-in-progress new file mode 100644 index 00000000000..cb27a900f15 --- /dev/null +++ b/doc/release-notes/7940-stop-harvest-in-progress @@ -0,0 +1,4 @@ +## Mechanism added for stopping a harvest in progress + +It is now possible for an admin to stop a long-running harvesting job. See [Harvesting Clients](https://guides.dataverse.org/en/latest/admin/harvestclients.html) guide for more information. + From 7a80d7e69388d0a0a35ee72ea60442f11154b24a Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Wed, 7 Dec 2022 13:24:13 -0500 Subject: [PATCH 447/608] #9211 fix render logic display with TOA OR restricted files --- src/main/webapp/dataset-license-terms.xhtml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/webapp/dataset-license-terms.xhtml b/src/main/webapp/dataset-license-terms.xhtml index 1cbf297bf89..8172efac67f 100644 --- a/src/main/webapp/dataset-license-terms.xhtml +++ b/src/main/webapp/dataset-license-terms.xhtml @@ -236,7 +236,8 @@ -
Date: Thu, 8 Dec 2022 09:46:17 -0500 Subject: [PATCH 449/608] add docs #8944 --- .../source/admin/metadatacustomization.rst | 8 +++-- doc/sphinx-guides/source/api/native-api.rst | 33 ++++++++++++++++--- 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst index 5f7cf85f714..9fb8626d4c4 100644 --- a/doc/sphinx-guides/source/admin/metadatacustomization.rst +++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst @@ -386,12 +386,16 @@ Metadata Block Setup Now that you understand the TSV format used for metadata blocks, the next step is to attempt to make improvements to existing metadata blocks or create entirely new metadata blocks. For either task, you should have a Dataverse Software development environment set up for testing where you can drop the database frequently while you make edits to TSV files. Once you have tested your TSV files, you should consider making a pull request to contribute your improvement back to the community. +.. _exploring-metadata-blocks: + Exploring Metadata Blocks ~~~~~~~~~~~~~~~~~~~~~~~~~ -In addition to studying the TSV files themselves you might find the following highly experimental and subject-to-change API endpoints useful to understand the metadata blocks that have already been loaded into your Dataverse installation: +In addition to studying the TSV files themselves you will probably find the :ref:`metadata-blocks-api` API helpful in getting a structured dump of metadata blocks in JSON format. + +There are also a few older, highly experimental, and subject-to-change API endpoints under the "admin" API documented below but the public API above is preferred. -You can get a dump of metadata fields (yes, the output is odd, please open a issue) like this: +You can get a dump of metadata fields like this: ``curl http://localhost:8080/api/admin/datasetfield`` diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 87a4d3def58..76ca38fdc70 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -3007,22 +3007,47 @@ The fully expanded example above (without environment variables) looks like this curl https://demo.dataverse.org/api/info/apiTermsOfUse +.. _metadata-blocks-api: + Metadata Blocks --------------- +See also :ref:`exploring-metadata-blocks`. + Show Info About All Metadata Blocks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -|CORS| Lists brief info about all metadata blocks registered in the system:: +|CORS| Lists brief info about all metadata blocks registered in the system. + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + + curl $SERVER_URL/api/metadatablocks + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash - GET http://$SERVER/api/metadatablocks + curl https://demo.dataverse.org/api/metadatablocks Show Info About Single Metadata Block ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -|CORS| Return data about the block whose ``identifier`` is passed. ``identifier`` can either be the block's id, or its name:: +|CORS| Return data about the block whose ``identifier`` is passed, including allowed controlled vocabulary values. ``identifier`` can either be the block's database id, or its name (i.e. "citation"). + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export IDENTIFIER=citation + + curl $SERVER_URL/api/metadatablocks/$IDENTIFIER + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash - GET http://$SERVER/api/metadatablocks/$identifier + curl https://demo.dataverse.org/api/metadatablocks/citation .. _Notifications: From 711dc6362dc629269d7db5840eb13821fc978682 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 9 Dec 2022 10:39:44 -0500 Subject: [PATCH 450/608] extra metadata from NetCDF and HDF5 files in NcML format #9153 --- doc/release-notes/9153-extract-metadata.md | 1 + .../source/user/dataset-management.rst | 7 ++ .../edu/harvard/iq/dataverse/DatasetPage.java | 1 + .../iq/dataverse/EditDatafilesPage.java | 1 + .../datadeposit/MediaResourceManagerImpl.java | 1 + .../datasetutility/AddReplaceFileHelper.java | 2 + .../dataverse/ingest/IngestServiceBean.java | 64 ++++++++++++++++++- .../harvard/iq/dataverse/api/NetcdfIT.java | 57 +++++++++++++++++ 8 files changed, 133 insertions(+), 1 deletion(-) create mode 100644 doc/release-notes/9153-extract-metadata.md create mode 100644 src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java diff --git a/doc/release-notes/9153-extract-metadata.md b/doc/release-notes/9153-extract-metadata.md new file mode 100644 index 00000000000..ce4cc714805 --- /dev/null +++ b/doc/release-notes/9153-extract-metadata.md @@ -0,0 +1 @@ +For NetCDF and HDF5 files, an attempt will be made to extract metadata in NcML (XML) format and save it as an auxiliary file. diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst index ec3bb392ce5..e891ca72880 100755 --- a/doc/sphinx-guides/source/user/dataset-management.rst +++ b/doc/sphinx-guides/source/user/dataset-management.rst @@ -299,6 +299,13 @@ Astronomy (FITS) Metadata found in the header section of `Flexible Image Transport System (FITS) files `_ are automatically extracted by the Dataverse Software, aggregated and displayed in the Astronomy Domain-Specific Metadata of the Dataset that the file belongs to. This FITS file metadata, is therefore searchable and browsable (facets) at the Dataset-level. +NetCDF and HDF5 +--------------- + +For NetCDF and HDF5 files, an attempt will be made to extract metadata in NcML_ (XML) format and save it as an auxiliary file. (See also :doc:`/developers/aux-file-support` in the Developer Guide.) + +.. _NcML: https://docs.unidata.ucar.edu/netcdf-java/current/userguide/ncml_overview.html + Compressed Files ---------------- diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 6e71f6c5042..b538aaca2c6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -3733,6 +3733,7 @@ public String save() { // Call Ingest Service one more time, to // queue the data ingest jobs for asynchronous execution: ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) session.getUser()); + ingestService.extractMetadata(dataset, (AuthenticatedUser) session.getUser()); //After dataset saved, then persist prov json data if(systemConfig.isProvCollectionEnabled()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index fc8df8681af..d045126a3aa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -1225,6 +1225,7 @@ public String save() { // queue the data ingest jobs for asynchronous execution: if (mode == FileEditMode.UPLOAD) { ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) session.getUser()); + ingestService.extractMetadata(dataset, (AuthenticatedUser) session.getUser()); } if (FileEditMode.EDIT == mode && Referrer.FILE == referrer && fileMetadatas.size() > 0) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java index 5491024c73c..e8d25bb4148 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java @@ -373,6 +373,7 @@ DepositReceipt replaceOrAddFiles(String uri, Deposit deposit, AuthCredentials au } ingestService.startIngestJobsForDataset(dataset, user); + ingestService.extractMetadata(dataset, user); ReceiptGenerator receiptGenerator = new ReceiptGenerator(); String baseUrl = urlManager.getHostnamePlusBaseUrlPath(uri); diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index febbb249a91..5277d014430 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -1932,6 +1932,7 @@ private boolean step_100_startIngestJobs(){ // start the ingest! ingestService.startIngestJobsForDataset(dataset, dvRequest.getAuthenticatedUser()); msg("post ingest start"); + ingestService.extractMetadata(dataset, dvRequest.getAuthenticatedUser()); } return true; } @@ -2145,6 +2146,7 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { } //ingest job ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser); + ingestService.extractMetadata(dataset, (AuthenticatedUser) authUser); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index b03bae618a4..e261efce642 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -20,6 +20,8 @@ package edu.harvard.iq.dataverse.ingest; +import edu.harvard.iq.dataverse.AuxiliaryFile; +import edu.harvard.iq.dataverse.AuxiliaryFileServiceBean; import edu.harvard.iq.dataverse.ControlledVocabularyValue; import edu.harvard.iq.dataverse.datavariable.VariableCategory; import edu.harvard.iq.dataverse.datavariable.VariableServiceBean; @@ -72,6 +74,7 @@ //import edu.harvard.iq.dvn.unf.*; import org.dataverse.unf.*; import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; @@ -81,6 +84,7 @@ import java.nio.channels.FileChannel; import java.nio.channels.ReadableByteChannel; import java.nio.channels.WritableByteChannel; +import java.nio.charset.StandardCharsets; import java.nio.file.DirectoryStream; import java.nio.file.Files; import java.nio.file.Path; @@ -113,6 +117,9 @@ import javax.jms.QueueSession; import javax.jms.Message; import javax.faces.application.FacesMessage; +import javax.ws.rs.core.MediaType; +import ucar.nc2.NetcdfFile; +import ucar.nc2.NetcdfFiles; /** * @@ -134,6 +141,8 @@ public class IngestServiceBean { @EJB DataFileServiceBean fileService; @EJB + AuxiliaryFileServiceBean auxiliaryFileService; + @EJB SystemConfig systemConfig; @Resource(lookup = "java:app/jms/queue/ingest") @@ -343,6 +352,7 @@ public List saveAndAddFilesToDataset(DatasetVersion version, try { // FITS is the only type supported for metadata // extraction, as of now. -- L.A. 4.0 + // Consider adding other formats such as NetCDF/HDF5. dataFile.setContentType("application/fits"); metadataExtracted = extractMetadata(tempFileLocation, dataFile, version); } catch (IOException mex) { @@ -565,7 +575,58 @@ public int compare(DataFile d1, DataFile d2) { return sb.toString(); } - + // Note: There is another method called extractMetadata for FITS files. + public void extractMetadata(Dataset dataset, AuthenticatedUser user) { + for (DataFile dataFile : dataset.getFiles()) { + Path pathToLocalDataFile = null; + try { + pathToLocalDataFile = dataFile.getStorageIO().getFileSystemPath(); + } catch (IOException ex) { + logger.info("Exception calling dataAccess.getFileSystemPath: " + ex); + } + InputStream inputStream = null; + if (pathToLocalDataFile != null) { + try ( NetcdfFile netcdfFile = NetcdfFiles.open(pathToLocalDataFile.toString())) { + if (netcdfFile != null) { + // TODO: What should we pass as a URL to toNcml()? + String ncml = netcdfFile.toNcml("FIXME_URL"); + inputStream = new ByteArrayInputStream(ncml.getBytes(StandardCharsets.UTF_8)); + } else { + logger.info("NetcdfFiles.open() could open file id " + dataFile.getId() + " (null returned)."); + } + } catch (IOException ex) { + logger.info("NetcdfFiles.open() could open file id " + dataFile.getId() + ". Exception caught: " + ex); + } + } else { + logger.info("pathToLocalDataFile is null! Are you on S3? Metadata extraction from NetCDF/HDF5 is not yet available."); + // As a tabular file, we'll probably need to download the NetCDF/HDF5 files from S3 and then try to extra the metadata, + // unless we can get some sort of S3 interface working: + // https://docs.unidata.ucar.edu/netcdf-java/current/userguide/dataset_urls.html#object-stores + // If we need to download the file and extract only some of the bytes (hopefully the first bytes) here's the spec for NetCDF: + // https://docs.unidata.ucar.edu/netcdf-c/current/file_format_specifications.html + } + if (inputStream != null) { + // TODO: What should the tag be? + String formatTag = "ncml"; + // TODO: What should the version be? + String formatVersion = "0.1"; + // TODO: What should the origin be? + String origin = "myOrigin"; + boolean isPublic = true; + // TODO: What should the type be? + String type = "myType"; + // TODO: Does NcML have its own content type? (MIME type) + MediaType mediaType = new MediaType("text", "xml"); + try { + AuxiliaryFile auxFile = auxiliaryFileService.processAuxiliaryFile(inputStream, dataFile, formatTag, formatVersion, origin, isPublic, type, mediaType); + logger.info("Aux file extracted from NetCDF/HDF5 file saved: " + auxFile); + } catch (Exception ex) { + logger.info("exception throw calling processAuxiliaryFile: " + ex); + } + } + } + } + public void produceSummaryStatistics(DataFile dataFile, File generatedTabularFile) throws IOException { /* logger.info("Skipping summary statistics and UNF."); @@ -1159,6 +1220,7 @@ public boolean fileMetadataExtractable(DataFile dataFile) { * extractMetadata: * framework for extracting metadata from uploaded files. The results will * be used to populate the metadata of the Dataset to which the file belongs. + * Note that another method called extractMetadata creates aux files from data files. */ public boolean extractMetadata(String tempFileLocation, DataFile dataFile, DatasetVersion editVersion) throws IOException { boolean ingestSuccessful = false; diff --git a/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java b/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java new file mode 100644 index 00000000000..a83af514935 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java @@ -0,0 +1,57 @@ +package edu.harvard.iq.dataverse.api; + +import com.jayway.restassured.RestAssured; +import com.jayway.restassured.path.json.JsonPath; +import com.jayway.restassured.response.Response; +import java.io.IOException; +import static javax.ws.rs.core.Response.Status.CREATED; +import static javax.ws.rs.core.Response.Status.OK; +import org.junit.BeforeClass; +import org.junit.Test; + +public class NetcdfIT { + + @BeforeClass + public static void setUp() { + RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); + } + + @Test + public void testNmclFromNetcdf() throws IOException { + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(OK.getStatusCode()); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + String username = UtilIT.getUsernameFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + createDataverseResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDataset.prettyPrint(); + createDataset.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); + String datasetPid = UtilIT.getDatasetPersistentIdFromResponse(createDataset); + + String pathToFile = "src/test/resources/netcdf/madis-raob"; + + Response uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken); + uploadFile.prettyPrint(); + uploadFile.then().assertThat().statusCode(OK.getStatusCode()); + + long fileId = JsonPath.from(uploadFile.body().asString()).getLong("data.files[0].dataFile.id"); + String tag = "ncml"; + String version = "0.1"; + + Response downloadNcml = UtilIT.downloadAuxFile(fileId, tag, version, apiToken); + //downloadNcml.prettyPrint(); // long output + downloadNcml.then().assertThat() + .statusCode(OK.getStatusCode()) + .contentType("text/xml; name=\"madis-raob.ncml_0.1.xml\";charset=UTF-8"); + } +} From c4f07f91446eedeee611a75537b3b90872817d0b Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 9 Dec 2022 17:57:29 -0500 Subject: [PATCH 451/608] more tests for the OAI server functionality (#8843) --- .../iq/dataverse/api/HarvestingServerIT.java | 349 ++++++++++++------ .../edu/harvard/iq/dataverse/api/UtilIT.java | 5 + 2 files changed, 243 insertions(+), 111 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index 5355b57490d..d25ffd225d9 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -9,24 +9,18 @@ import org.junit.Test; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import com.jayway.restassured.response.Response; -import com.jayway.restassured.path.json.JsonPath; import com.jayway.restassured.path.xml.XmlPath; import com.jayway.restassured.path.xml.element.Node; -import static edu.harvard.iq.dataverse.api.UtilIT.API_TOKEN_HTTP_HEADER; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; -import javax.json.Json; -import javax.json.JsonArray; -import static javax.ws.rs.core.Response.Status.FORBIDDEN; import static javax.ws.rs.core.Response.Status.OK; import static org.hamcrest.CoreMatchers.equalTo; -import org.junit.Ignore; import java.util.List; -import static junit.framework.Assert.assertEquals; +//import static junit.framework.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertEquals; /** * Tests for the Harvesting Server functionality @@ -184,142 +178,204 @@ public void testOaiListMetadataFormats() { @Test - public void testSetCreateAPIandOAIlistIdentifiers() { - // Create the set with Dataverse /api/harvest/server API: + public void testNativeSetAPI() { String setName = UtilIT.getRandomString(6); String def = "*"; - - // make sure the set does not exist + + // This test focuses on the Create/List/Edit functionality of the + // Dataverse OAI Sets API (/api/harvest/server): + + // API Test 1. Make sure the set does not exist yet String setPath = String.format("/api/harvest/server/oaisets/%s", setName); String createPath ="/api/harvest/server/oaisets/add"; - Response r0 = given() + Response getSetResponse = given() .get(setPath); - assertEquals(404, r0.getStatusCode()); + assertEquals(404, getSetResponse.getStatusCode()); - // try to create set as normal user, should fail - Response r1 = given() + // API Test 2. Try to create set as normal user, should fail + Response createSetResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey) .body(jsonForTestSpec(setName, def)) .post(createPath); - assertEquals(400, r1.getStatusCode()); + assertEquals(400, createSetResponse.getStatusCode()); - // try to create set as admin user, should succeed - Response r2 = given() + // API Test 3. Try to create set as admin user, should succeed + createSetResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) .body(jsonForTestSpec(setName, def)) .post(createPath); - assertEquals(201, r2.getStatusCode()); + assertEquals(201, createSetResponse.getStatusCode()); - Response getSet = given() - .get(setPath); + // API Test 4. Retrieve the set we've just created, validate the response + getSetResponse = given().get(setPath); - logger.info("getSet.getStatusCode(): " + getSet.getStatusCode()); - logger.info("getSet printresponse: " + getSet.prettyPrint()); - assertEquals(200, getSet.getStatusCode()); + System.out.println("getSetResponse.getStatusCode(): " + getSetResponse.getStatusCode()); + System.out.println("getSetResponse, full: " + getSetResponse.prettyPrint()); + assertEquals(200, getSetResponse.getStatusCode()); + + getSetResponse.then().assertThat() + .body("status", equalTo(AbstractApiBean.STATUS_OK)) + .body("data.definition", equalTo("*")) + .body("data.description", equalTo("")) + .body("data.name", equalTo(setName)); + + // API Test 5. Retrieve all sets, check that our new set is listed Response responseAll = given() .get("/api/harvest/server/oaisets"); - logger.info("responseAll.getStatusCode(): " + responseAll.getStatusCode()); - logger.info("responseAll printresponse: " + responseAll.prettyPrint()); + System.out.println("responseAll.getStatusCode(): " + responseAll.getStatusCode()); + System.out.println("responseAll full: " + responseAll.prettyPrint()); assertEquals(200, responseAll.getStatusCode()); - - // try to create set with same name as admin user, should fail - Response r3 = given() + assertTrue(responseAll.body().jsonPath().getList("data.oaisets").size() > 0); + assertTrue(responseAll.body().jsonPath().getList("data.oaisets.name").toString().contains(setName)); // todo: simplify + + // API Test 6. Try to create a set with the same name, should fail + createSetResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) .body(jsonForTestSpec(setName, def)) .post(createPath); - assertEquals(400, r3.getStatusCode()); + assertEquals(400, createSetResponse.getStatusCode()); - // try to export set as admin user, should succeed (under admin API, not checking that normal user will fail) + // API Test 7. Try to export set as admin user, should succeed. Set export + // is under /api/admin, no need to try to access it as a non-admin user Response r4 = UtilIT.exportOaiSet(setName); assertEquals(200, r4.getStatusCode()); - - - - // try to delete as normal user, should fail - Response r5 = given() + + // API TEST 8. Try to delete the set as normal user, should fail + Response deleteResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey) .delete(setPath); - logger.info("r5.getStatusCode(): " + r5.getStatusCode()); - assertEquals(400, r5.getStatusCode()); + logger.info("deleteResponse.getStatusCode(): " + deleteResponse.getStatusCode()); + assertEquals(400, deleteResponse.getStatusCode()); - // try to delete as admin user, should work - Response r6 = given() + // API TEST 9. Delete as admin user, should work + deleteResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) .delete(setPath); - logger.info("r6.getStatusCode(): " + r6.getStatusCode()); - assertEquals(200, r6.getStatusCode()); + logger.info("deleteResponse.getStatusCode(): " + deleteResponse.getStatusCode()); + assertEquals(200, deleteResponse.getStatusCode()); } @Test - public void testSetEdit() { - //setupUsers(); + public void testSetEditAPIandOAIlistSets() { + // This test focuses on testing the Edit functionality of the Dataverse + // OAI Set API and the ListSets method of the Dataverse OAI server. + + // Initial setup: crete a test set. + // Since the Create and List (POST and GET) functionality of the API + // is tested extensively in the previous test, we will not be paying + // as much attention to these methods, aside from confirming the + // expected HTTP result codes. + String setName = UtilIT.getRandomString(6); - String def = "*"; + String setDef = "*"; - // make sure the set does not exist - String u0 = String.format("/api/harvest/server/oaisets/%s", setName); + // Make sure the set does not exist + String setPath = String.format("/api/harvest/server/oaisets/%s", setName); String createPath ="/api/harvest/server/oaisets/add"; - Response r0 = given() - .get(u0); - assertEquals(404, r0.getStatusCode()); + Response getSetResponse = given() + .get(setPath); + assertEquals(404, getSetResponse.getStatusCode()); - // try to create set as admin user, should succeed - Response r1 = given() + // Create the set as admin user + Response createSetResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) - .body(jsonForTestSpec(setName, def)) + .body(jsonForTestSpec(setName, setDef)) .post(createPath); - assertEquals(201, r1.getStatusCode()); + assertEquals(201, createSetResponse.getStatusCode()); + // I. Test the Modify/Edit (POST method) functionality of the + // Dataverse OAI Sets API - // try to edit as normal user should fail - Response r2 = given() + String newDefinition = "title:New"; + String newDescription = "updated"; + + // API Test 1. Try to modify the set as normal user, should fail + Response editSetResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey) - .body(jsonForEditSpec(setName, def,"")) - .put(u0); - logger.info("r2.getStatusCode(): " + r2.getStatusCode()); - assertEquals(400, r2.getStatusCode()); + .body(jsonForEditSpec(setName, setDef, "")) + .put(setPath); + logger.info("non-admin user editSetResponse.getStatusCode(): " + editSetResponse.getStatusCode()); + assertEquals(400, editSetResponse.getStatusCode()); - // try to edit as with blanks should fail - Response r3 = given() + // API Test 2. Try to modify as admin, but with invalid (empty) values, + // should fail + editSetResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) .body(jsonForEditSpec(setName, "","")) - .put(u0); - logger.info("r3.getStatusCode(): " + r3.getStatusCode()); - assertEquals(400, r3.getStatusCode()); + .put(setPath); + logger.info("invalid values editSetResponse.getStatusCode(): " + editSetResponse.getStatusCode()); + assertEquals(400, editSetResponse.getStatusCode()); - // try to edit as with something should pass - Response r4 = given() + // API Test 3. Try to modify as admin, with sensible values + editSetResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) - .body(jsonForEditSpec(setName, "newDef","newDesc")) - .put(u0); - logger.info("r4 Status code: " + r4.getStatusCode()); - logger.info("r4.prettyPrint(): " + r4.prettyPrint()); - assertEquals(OK.getStatusCode(), r4.getStatusCode()); - - logger.info("u0: " + u0); - // now delete it... - Response r6 = given() + .body(jsonForEditSpec(setName, newDefinition, newDescription)) + .put(setPath); + logger.info("admin user editSetResponse status code: " + editSetResponse.getStatusCode()); + logger.info("admin user editSetResponse.prettyPrint(): " + editSetResponse.prettyPrint()); + assertEquals(OK.getStatusCode(), editSetResponse.getStatusCode()); + + // API Test 4. List the set, confirm that the new values are shown + getSetResponse = given().get(setPath); + + System.out.println("getSetResponse.getStatusCode(): " + getSetResponse.getStatusCode()); + System.out.println("getSetResponse, full: " + getSetResponse.prettyPrint()); + assertEquals(200, getSetResponse.getStatusCode()); + + getSetResponse.then().assertThat() + .body("status", equalTo(AbstractApiBean.STATUS_OK)) + .body("data.definition", equalTo(newDefinition)) + .body("data.description", equalTo(newDescription)) + .body("data.name", equalTo(setName)); + + // II. Test the ListSets functionality of the OAI server + + Response listSetsResponse = UtilIT.getOaiListSets(); + + // 1. Validate the service section of the OAI response: + + XmlPath responseXmlPath = validateOaiVerbResponse(listSetsResponse, "ListSets"); + + // 2. Validate the payload of the response, by confirming that the set + // we created and modified, above, is being listed by the OAI server + // and its xml record is properly formatted + + List listSets = responseXmlPath.getList("OAI-PMH.ListSets.set.list()"); // TODO - maybe try it with findAll()? + assertNotNull(listSets); + assertTrue(listSets.size() > 0); + + Node foundSetNode = null; + for (Node setNode : listSets) { + + if (setName.equals(setNode.get("setName").toString())) { + foundSetNode = setNode; + break; + } + } + + assertNotNull("Newly-created set is not listed by the OAI server", foundSetNode); + assertEquals("Incorrect description in the ListSets entry", newDescription, foundSetNode.getPath("setDescription.metadata.element.field", String.class)); + + // ok, the xml record looks good! + + // Cleanup. Delete the set with the DELETE API + Response deleteSetResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) - .delete(u0); - logger.info("r6.getStatusCode(): " + r6.getStatusCode()); - assertEquals(200, r6.getStatusCode()); + .delete(setPath); + assertEquals(200, deleteSetResponse.getStatusCode()); } - // A more elaborate test - we'll create and publish a dataset, then create an - // OAI set with that one dataset, and attempt to retrieve the OAI record - // with GetRecord. + // A more elaborate test - we will create and export an + // OAI set with a single dataset, and attempt to retrieve + // it and validate the OAI server responses of the corresponding + // ListIdentifiers, ListRecords and GetRecord methods. @Test public void testSingleRecordOaiSet() throws InterruptedException { - - //setupUsers(); - - - // Let's try and create an OAI set with the "single set dataset" that // was created as part of the initial setup: @@ -333,12 +389,18 @@ public void testSingleRecordOaiSet() throws InterruptedException { .post(createPath); assertEquals(201, createSetResponse.getStatusCode()); - // TODO: a) look up the set via native harvest/server api; - // b) look up the set via the OAI ListSets; - // export set: - // (this is asynchronous - so we should probably wait a little) + // The GET method of the oai set API, as well as the OAI ListSets + // method are tested extensively in another method in this class, so + // we'll skip checking those here. + + // Let's export the set. This is asynchronous - so we will try to + // wait a little - but in practice, everything potentially time-consuming + // must have been done when the dataset was exported, in the setup method. + Response exportSetResponse = UtilIT.exportOaiSet(setName); assertEquals(200, exportSetResponse.getStatusCode()); + Thread.sleep(1000L); + Response getSet = given() .get(apiPath); @@ -350,25 +412,38 @@ public void testSingleRecordOaiSet() throws InterruptedException { do { - // Run ListIdentifiers on this newly-created set: + // OAI Test 1. Run ListIdentifiers on this newly-created set: Response listIdentifiersResponse = UtilIT.getOaiListIdentifiers(setName, "oai_dc"); - List ret = listIdentifiersResponse.getBody().xmlPath().getList("OAI-PMH.ListIdentifiers.header"); - assertEquals(OK.getStatusCode(), listIdentifiersResponse.getStatusCode()); + + // Validate the service section of the OAI response: + XmlPath responseXmlPath = validateOaiVerbResponse(listIdentifiersResponse, "ListIdentifiers"); + + List ret = responseXmlPath.getList("OAI-PMH.ListIdentifiers.header"); assertNotNull(ret); - logger.info("setName: " + setName); + if (logger.isLoggable(Level.FINE)) { logger.info("listIdentifiersResponse.prettyPrint:..... "); listIdentifiersResponse.prettyPrint(); } - if (ret.size() != 1) { + if (ret.isEmpty()) { + // OK, we'll sleep for another second - provided it's been less + // than 10 sec. total. i++; } else { - // There should be 1 and only 1 record in the response: + // Validate the payload of the ListRecords response: + // a) There should be 1 and only 1 record in the response: assertEquals(1, ret.size()); - // And the record should be the dataset we have just created: - assertEquals(singleSetDatasetPersistentId, listIdentifiersResponse.getBody().xmlPath() + // b) The one record in it should be the dataset we have just created: + assertEquals(singleSetDatasetPersistentId, responseXmlPath .getString("OAI-PMH.ListIdentifiers.header.identifier")); + assertEquals(setName, responseXmlPath + .getString("OAI-PMH.ListIdentifiers.header.setSpec")); + assertNotNull(responseXmlPath.getString("OAI-PMH.ListIdentifiers.header.dateStamp")); + // TODO: validate the formatting of the date string in the record + // header, above! + + // ok, ListIdentifiers response looks valid. break; } Thread.sleep(1000L); @@ -379,34 +454,86 @@ public void testSingleRecordOaiSet() throws InterruptedException { // already happened during its publishing (we made sure to wait there). // Exporting the set should not take any time - but I'll keep that code // in place since it's not going to hurt. - L.A. + System.out.println("Waited " + i + " seconds for OIA export."); //Fail if we didn't find the exported record before the timeout assertTrue(i < maxWait); + + + // OAI Test 2. Run ListRecords, request oai_dc: Response listRecordsResponse = UtilIT.getOaiListRecords(setName, "oai_dc"); assertEquals(OK.getStatusCode(), listRecordsResponse.getStatusCode()); - List listRecords = listRecordsResponse.getBody().xmlPath().getList("OAI-PMH.ListRecords.record"); + + // Validate the service section of the OAI response: + + XmlPath responseXmlPath = validateOaiVerbResponse(listRecordsResponse, "ListRecords"); + + // Validate the payload of the response: + // (the header portion must be identical to that of ListIdentifiers above, + // plus the response must contain a metadata section with a valid oai_dc + // record) + + List listRecords = responseXmlPath.getList("OAI-PMH.ListRecords.record"); + // Same deal, there must be 1 record only in the set: assertNotNull(listRecords); assertEquals(1, listRecords.size()); - assertEquals(singleSetDatasetPersistentId, listRecordsResponse.getBody().xmlPath().getString("OAI-PMH.ListRecords.record[0].header.identifier")); - - // assert that Datacite format does not contain the XML prolog + // a) header section: + assertEquals(singleSetDatasetPersistentId, responseXmlPath.getString("OAI-PMH.ListRecords.record.header.identifier")); + assertEquals(setName, responseXmlPath + .getString("OAI-PMH.ListRecords.record.header.setSpec")); + assertNotNull(responseXmlPath.getString("OAI-PMH.ListRecords.record.header.dateStamp")); + // b) metadata section: + // in the metadata section we are showing the resolver url form of the doi: + String persistentIdUrl = singleSetDatasetPersistentId.replace("doi:", "https://doi.org/"); + assertEquals(persistentIdUrl, responseXmlPath.getString("OAI-PMH.ListRecords.record.metadata.dc.identifier")); + assertEquals("Darwin's Finches", responseXmlPath.getString("OAI-PMH.ListRecords.record.metadata.dc.title")); + assertEquals("Finch, Fiona", responseXmlPath.getString("OAI-PMH.ListRecords.record.metadata.dc.creator")); + assertEquals("Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.", + responseXmlPath.getString("OAI-PMH.ListRecords.record.metadata.dc.description")); + assertEquals("Medicine, Health and Life Sciences", + responseXmlPath.getString("OAI-PMH.ListRecords.record.metadata.dc.subject")); + // ok, looks legit! + + // OAI Test 3. + // Assert that Datacite format does not contain the XML prolog + // (this is a reference to a resolved issue; generally, harvestable XML + // exports must NOT contain the "")); - // And now run GetRecord on the OAI record for the dataset: - Response getRecordResponse = UtilIT.getOaiRecord(singleSetDatasetPersistentId, "oai_dc"); + // OAI Test 4. run and validate GetRecord response + Response getRecordResponse = UtilIT.getOaiRecord(singleSetDatasetPersistentId, "oai_dc"); System.out.println("GetRecord response in its entirety: "+getRecordResponse.getBody().prettyPrint()); - System.out.println("one more time:"); - getRecordResponse.prettyPrint(); + + // Validate the service section of the OAI response: + responseXmlPath = validateOaiVerbResponse(getRecordResponse, "GetRecord"); + + // Validate the payload of the response: + + // Note that for a set with a single record the output of ListRecrods is + // essentially identical to that of GetRecord! + // (we'll test a multi-record set in a different method) + // a) header section: + assertEquals(singleSetDatasetPersistentId, responseXmlPath.getString("OAI-PMH.GetRecord.record.header.identifier")); + assertEquals(setName, responseXmlPath + .getString("OAI-PMH.GetRecord.record.header.setSpec")); + assertNotNull(responseXmlPath.getString("OAI-PMH.GetRecord.record.header.dateStamp")); + // b) metadata section: + assertEquals(persistentIdUrl, responseXmlPath.getString("OAI-PMH.GetRecord.record.metadata.dc.identifier")); + assertEquals("Darwin's Finches", responseXmlPath.getString("OAI-PMH.GetRecord.record.metadata.dc.title")); + assertEquals("Finch, Fiona", responseXmlPath.getString("OAI-PMH.GetRecord.record.metadata.dc.creator")); + assertEquals("Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.", + responseXmlPath.getString("OAI-PMH.GetRecord.record.metadata.dc.description")); + assertEquals("Medicine, Health and Life Sciences", responseXmlPath.getString("OAI-PMH.GetRecord.record.metadata.dc.subject")); - assertEquals(singleSetDatasetPersistentId, getRecordResponse.getBody().xmlPath().getString("OAI-PMH.GetRecord.record.header.identifier")); + // ok, looks legit! - // TODO: - // check the actual metadata payload of the OAI record more carefully? } // This test will attempt to create a set with multiple records (enough diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 9fa47db167b..ac767279bd4 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -2630,6 +2630,11 @@ static Response getOaiListMetadataFormats() { return given().get(oaiVerbPath); } + static Response getOaiListSets() { + String oaiVerbPath = "/oai?verb=ListSets"; + return given().get(oaiVerbPath); + } + static Response getOaiRecord(String datasetPersistentId, String metadataFormat) { String apiPath = String.format("/oai?verb=GetRecord&identifier=%s&metadataPrefix=%s", datasetPersistentId, metadataFormat); return given().get(apiPath); From 9cbfa31d4489ed4ce6df6e37a0fecf92f3a77d18 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 12 Dec 2022 13:51:58 -0500 Subject: [PATCH 452/608] extra (extra tedious) server tests validating paging (resumptionToken) functionality of ListIdentifiers and ListRecords (#8843) --- .../iq/dataverse/api/HarvestingServerIT.java | 340 +++++++++++++++++- .../edu/harvard/iq/dataverse/api/UtilIT.java | 18 +- 2 files changed, 351 insertions(+), 7 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index d25ffd225d9..3497c71e169 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -16,6 +16,8 @@ import static javax.ws.rs.core.Response.Status.OK; import static org.hamcrest.CoreMatchers.equalTo; import java.util.List; +import java.util.Set; +import java.util.HashSet; //import static junit.framework.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; @@ -35,6 +37,7 @@ public class HarvestingServerIT { private static String adminUserAPIKey; private static String singleSetDatasetIdentifier; private static String singleSetDatasetPersistentId; + private static List extraDatasetsIdentifiers = new ArrayList<>(); @BeforeClass public static void setUpClass() { @@ -98,6 +101,28 @@ private static void setupDatasets() { // takes longer than just publish/reindex. // So wait for all of this to finish. UtilIT.sleepForReexport(singleSetDatasetPersistentId, adminUserAPIKey, 10); + + // ... And let's create 4 more datasets for a multi-dataset experiment: + + for (int i = 0; i < 4; i++) { + // create dataset: + createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, adminUserAPIKey); + createDatasetResponse.prettyPrint(); + datasetId = UtilIT.getDatasetIdFromResponse(createDatasetResponse); + + // retrieve the global id: + String thisDatasetPersistentId = UtilIT.getDatasetPersistentIdFromResponse(createDatasetResponse); + + // publish dataset: + publishDataset = UtilIT.publishDatasetViaNativeApi(thisDatasetPersistentId, "major", adminUserAPIKey); + assertEquals(200, publishDataset.getStatusCode()); + + UtilIT.sleepForReexport(thisDatasetPersistentId, adminUserAPIKey, 10); + + extraDatasetsIdentifiers.add(thisDatasetPersistentId.substring(thisDatasetPersistentId.lastIndexOf('/') + 1)); + } + + } private String jsonForTestSpec(String name, String def) { @@ -423,16 +448,16 @@ public void testSingleRecordOaiSet() throws InterruptedException { assertNotNull(ret); if (logger.isLoggable(Level.FINE)) { - logger.info("listIdentifiersResponse.prettyPrint:..... "); - listIdentifiersResponse.prettyPrint(); + logger.info("listIdentifiersResponse.prettyPrint: " + + listIdentifiersResponse.prettyPrint()); } if (ret.isEmpty()) { // OK, we'll sleep for another second - provided it's been less // than 10 sec. total. i++; } else { - // Validate the payload of the ListRecords response: - // a) There should be 1 and only 1 record in the response: + // Validate the payload of the ListIdentifiers response: + // a) There should be 1 and only 1 item listed: assertEquals(1, ret.size()); // b) The one record in it should be the dataset we have just created: assertEquals(singleSetDatasetPersistentId, responseXmlPath @@ -537,12 +562,315 @@ public void testSingleRecordOaiSet() throws InterruptedException { } // This test will attempt to create a set with multiple records (enough - // to trigger a paged response with a continuation token) and test its - // performance. + // to trigger a paged respons) and test the resumption token functionality). + // Note that this test requires the OAI service to be configured with some + // non-default settings (the paging limits for ListIdentifiers and ListRecords + // must be set to something low, like 2). @Test public void testMultiRecordOaiSet() throws InterruptedException { + // Setup: Let's create a control OAI set with the 5 datasets created + // in the class init: + + String setName = UtilIT.getRandomString(6); + String setQuery = "(dsPersistentId:" + singleSetDatasetIdentifier; + for (String persistentId : extraDatasetsIdentifiers) { + setQuery = setQuery.concat(" OR dsPersistentId:" + persistentId); + } + setQuery = setQuery.concat(")"); + + String createPath = "/api/harvest/server/oaisets/add"; + + Response createSetResponse = given() + .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) + .body(jsonForTestSpec(setName, setQuery)) + .post(createPath); + assertEquals(201, createSetResponse.getStatusCode()); + + // Dataverse OAI Sets API is tested extensively in other methods here, + // so no need to test in any more details than confirming the OK result + // above + Response exportSetResponse = UtilIT.exportOaiSet(setName); + assertEquals(200, exportSetResponse.getStatusCode()); + Thread.sleep(1000L); + + // OAI Test 1. Run ListIdentifiers on the set we've just created: + Response listIdentifiersResponse = UtilIT.getOaiListIdentifiers(setName, "oai_dc"); + assertEquals(OK.getStatusCode(), listIdentifiersResponse.getStatusCode()); + + // Validate the service section of the OAI response: + XmlPath responseXmlPath = validateOaiVerbResponse(listIdentifiersResponse, "ListIdentifiers"); + + List ret = responseXmlPath.getList("OAI-PMH.ListIdentifiers.header.identifier"); + assertNotNull(ret); + + if (logger.isLoggable(Level.FINE)) { + logger.info("listIdentifiersResponse.prettyPrint: "+listIdentifiersResponse.prettyPrint()); + } + + // Validate the payload of the ListIdentifiers response: + // 1a) There should be 2 items listed: + assertEquals("Wrong number of items on the first ListIdentifiers page", + 2, ret.size()); + + // 1b) The response contains a resumptionToken for the next page of items: + String resumptionToken = responseXmlPath.getString("OAI-PMH.ListIdentifiers.resumptionToken"); + assertNotNull("No resumption token in the ListIdentifiers response", resumptionToken); + + // 1c) The total number of items in the set (5) is listed correctly: + assertEquals(5, responseXmlPath.getInt("OAI-PMH.ListIdentifiers.resumptionToken.@completeListSize")); + + // 1d) ... and the offset (cursor) is at the right position (0): + assertEquals(0, responseXmlPath.getInt("OAI-PMH.ListIdentifiers.resumptionToken.@cursor")); + + // The formatting of individual item records in the ListIdentifiers response + // is tested extensively in the previous test method, so we are not + // looking at them in such detail here; but we should record the + // identifiers listed, so that we can confirm that all the set is + // served as expected. + + Set persistentIdsInListIdentifiers = new HashSet<>(); + + for (String persistentId : ret) { + persistentIdsInListIdentifiers.add(persistentId.substring(persistentId.lastIndexOf('/') + 1)); + } + + // ok, let's move on to the next ListIdentifiers page: + // (we repeat the exact same checks as the above; minus the different + // expected offset) + + // OAI Test 2. Run ListIdentifiers with the resumptionToken obtained + // in the previous step: + + listIdentifiersResponse = UtilIT.getOaiListIdentifiersWithResumptionToken(resumptionToken); + assertEquals(OK.getStatusCode(), listIdentifiersResponse.getStatusCode()); + + // Validate the service section of the OAI response: + responseXmlPath = validateOaiVerbResponse(listIdentifiersResponse, "ListIdentifiers"); + + ret = responseXmlPath.getList("OAI-PMH.ListIdentifiers.header.identifier"); + assertNotNull(ret); + + if (logger.isLoggable(Level.FINE)) { + logger.info("listIdentifiersResponse.prettyPrint: "+listIdentifiersResponse.prettyPrint()); + } + + // Validate the payload of the ListIdentifiers response: + // 2a) There should still be 2 items listed: + assertEquals("Wrong number of items on the second ListIdentifiers page", + 2, ret.size()); + + // 2b) The response should contain a resumptionToken for the next page of items: + resumptionToken = responseXmlPath.getString("OAI-PMH.ListIdentifiers.resumptionToken"); + assertNotNull("No resumption token in the ListIdentifiers response", resumptionToken); + + // 2c) The total number of items in the set (5) is listed correctly: + assertEquals(5, responseXmlPath.getInt("OAI-PMH.ListIdentifiers.resumptionToken.@completeListSize")); + + // 2d) ... and the offset (cursor) is at the right position (2): + assertEquals(2, responseXmlPath.getInt("OAI-PMH.ListIdentifiers.resumptionToken.@cursor")); + + // Record the identifiers listed on this results page: + + for (String persistentId : ret) { + persistentIdsInListIdentifiers.add(persistentId.substring(persistentId.lastIndexOf('/') + 1)); + } + + // And now the next and the final ListIdentifiers page. + // This time around we should get an *empty* resumptionToken (indicating + // that there are no more results): + + // OAI Test 3. Run ListIdentifiers with the final resumptionToken + + listIdentifiersResponse = UtilIT.getOaiListIdentifiersWithResumptionToken(resumptionToken); + assertEquals(OK.getStatusCode(), listIdentifiersResponse.getStatusCode()); + + // Validate the service section of the OAI response: + responseXmlPath = validateOaiVerbResponse(listIdentifiersResponse, "ListIdentifiers"); + + ret = responseXmlPath.getList("OAI-PMH.ListIdentifiers.header.identifier"); + assertNotNull(ret); + + if (logger.isLoggable(Level.FINE)) { + logger.info("listIdentifiersResponse.prettyPrint: "+listIdentifiersResponse.prettyPrint()); + } + + // Validate the payload of the ListIdentifiers response: + // 3a) There should be only 1 item listed: + assertEquals("Wrong number of items on the final ListIdentifiers page", + 1, ret.size()); + + // 3b) The response contains a resumptionToken for the next page of items: + resumptionToken = responseXmlPath.getString("OAI-PMH.ListIdentifiers.resumptionToken"); + assertNotNull("No resumption token in the final ListIdentifiers response", resumptionToken); + assertTrue("Non-empty resumption token in the final ListIdentifiers response", "".equals(resumptionToken)); + + // 3c) The total number of items in the set (5) is still listed correctly: + assertEquals(5, responseXmlPath.getInt("OAI-PMH.ListIdentifiers.resumptionToken.@completeListSize")); + + // 3d) ... and the offset (cursor) is at the right position (4): + assertEquals(4, responseXmlPath.getInt("OAI-PMH.ListIdentifiers.resumptionToken.@cursor")); + // Record the last identifier listed on this final page: + persistentIdsInListIdentifiers.add(ret.get(0).substring(ret.get(0).lastIndexOf('/') + 1)); + + // Finally, let's confirm that the expected 5 datasets have been listed + // as part of this Set: + + boolean allDatasetsListed = true; + + allDatasetsListed = persistentIdsInListIdentifiers.contains(singleSetDatasetIdentifier); + for (String persistentId : extraDatasetsIdentifiers) { + allDatasetsListed = persistentIdsInListIdentifiers.contains(persistentId); + } + + assertTrue("Control datasets not properly listed in the paged ListIdentifiers response", + allDatasetsListed); + + // OK, it is safe to assume ListIdentifiers works as it should in page mode. + + // We will now repeat the exact same tests for ListRecords (again, no + // need to pay close attention to the formatting of the individual records, + // since that's tested in the previous test method, since our focus is + // testing the paging/resumptionToken functionality) + + // OAI Test 4. Run ListRecords on the set we've just created: + Response listRecordsResponse = UtilIT.getOaiListRecords(setName, "oai_dc"); + assertEquals(OK.getStatusCode(), listRecordsResponse.getStatusCode()); + + // Validate the service section of the OAI response: + responseXmlPath = validateOaiVerbResponse(listRecordsResponse, "ListRecords"); + + ret = responseXmlPath.getList("OAI-PMH.ListRecords.record.header.identifier"); + assertNotNull(ret); + + if (logger.isLoggable(Level.FINE)) { + logger.info("listRecordsResponse.prettyPrint: "+listRecordsResponse.prettyPrint()); + } + + // Validate the payload of the ListRecords response: + // 4a) There should be 2 items listed: + assertEquals("Wrong number of items on the first ListRecords page", + 2, ret.size()); + + // 4b) The response contains a resumptionToken for the next page of items: + resumptionToken = responseXmlPath.getString("OAI-PMH.ListRecords.resumptionToken"); + assertNotNull("No resumption token in the ListRecords response", resumptionToken); + + // 4c) The total number of items in the set (5) is listed correctly: + assertEquals(5, responseXmlPath.getInt("OAI-PMH.ListRecords.resumptionToken.@completeListSize")); + + // 4d) ... and the offset (cursor) is at the right position (0): + assertEquals(0, responseXmlPath.getInt("OAI-PMH.ListRecords.resumptionToken.@cursor")); + + Set persistentIdsInListRecords = new HashSet<>(); + + for (String persistentId : ret) { + persistentIdsInListRecords.add(persistentId.substring(persistentId.lastIndexOf('/') + 1)); + } + + // ok, let's move on to the next ListRecords page: + // (we repeat the exact same checks as the above; minus the different + // expected offset) + + // OAI Test 5. Run ListRecords with the resumptionToken obtained + // in the previous step: + + listRecordsResponse = UtilIT.getOaiListRecordsWithResumptionToken(resumptionToken); + assertEquals(OK.getStatusCode(), listRecordsResponse.getStatusCode()); + + // Validate the service section of the OAI response: + responseXmlPath = validateOaiVerbResponse(listRecordsResponse, "ListRecords"); + + ret = responseXmlPath.getList("OAI-PMH.ListRecords.record.header.identifier"); + assertNotNull(ret); + + if (logger.isLoggable(Level.FINE)) { + logger.info("listRecordsResponse.prettyPrint: "+listRecordsResponse.prettyPrint()); + } + + // Validate the payload of the ListRecords response: + // 4a) There should still be 2 items listed: + assertEquals("Wrong number of items on the second ListRecords page", + 2, ret.size()); + + // 4b) The response should contain a resumptionToken for the next page of items: + resumptionToken = responseXmlPath.getString("OAI-PMH.ListRecords.resumptionToken"); + assertNotNull("No resumption token in the ListRecords response", resumptionToken); + + // 4c) The total number of items in the set (5) is listed correctly: + assertEquals(5, responseXmlPath.getInt("OAI-PMH.ListRecords.resumptionToken.@completeListSize")); + + // 4d) ... and the offset (cursor) is at the right position (2): + assertEquals(2, responseXmlPath.getInt("OAI-PMH.ListRecords.resumptionToken.@cursor")); + + // Record the identifiers listed on this results page: + + for (String persistentId : ret) { + persistentIdsInListRecords.add(persistentId.substring(persistentId.lastIndexOf('/') + 1)); + } + + // And now the next and the final ListRecords page. + // This time around we should get an *empty* resumptionToken (indicating + // that there are no more results): + + // OAI Test 6. Run ListRecords with the final resumptionToken + + listRecordsResponse = UtilIT.getOaiListRecordsWithResumptionToken(resumptionToken); + assertEquals(OK.getStatusCode(), listRecordsResponse.getStatusCode()); + + // Validate the service section of the OAI response: + responseXmlPath = validateOaiVerbResponse(listRecordsResponse, "ListRecords"); + + ret = responseXmlPath.getList("OAI-PMH.ListRecords.record.header.identifier"); + assertNotNull(ret); + + if (logger.isLoggable(Level.FINE)) { + logger.info("listRecordsResponse.prettyPrint: "+listRecordsResponse.prettyPrint()); + } + + // Validate the payload of the ListRecords response: + // 6a) There should be only 1 item listed: + assertEquals("Wrong number of items on the final ListRecords page", + 1, ret.size()); + + // 6b) The response contains a resumptionToken for the next page of items: + resumptionToken = responseXmlPath.getString("OAI-PMH.ListRecords.resumptionToken"); + assertNotNull("No resumption token in the final ListRecords response", resumptionToken); + assertTrue("Non-empty resumption token in the final ListRecords response", "".equals(resumptionToken)); + + // 6c) The total number of items in the set (5) is still listed correctly: + assertEquals(5, responseXmlPath.getInt("OAI-PMH.ListRecords.resumptionToken.@completeListSize")); + + // 6d) ... and the offset (cursor) is at the right position (4): + assertEquals(4, responseXmlPath.getInt("OAI-PMH.ListRecords.resumptionToken.@cursor")); + + // Record the last identifier listed on this final page: + persistentIdsInListRecords.add(ret.get(0).substring(ret.get(0).lastIndexOf('/') + 1)); + + // Finally, let's confirm that the expected 5 datasets have been listed + // as part of this Set: + + allDatasetsListed = true; + + allDatasetsListed = persistentIdsInListRecords.contains(singleSetDatasetIdentifier); + for (String persistentId : extraDatasetsIdentifiers) { + allDatasetsListed = persistentIdsInListRecords.contains(persistentId); + } + + assertTrue("Control datasets not properly listed in the paged ListRecords response", + allDatasetsListed); + + // OK, it is safe to assume ListRecords works as it should in page mode + // as well. + + // And finally, let's delete the set + String setPath = String.format("/api/harvest/server/oaisets/%s", setName); + Response deleteResponse = given() + .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) + .delete(setPath); + logger.info("deleteResponse.getStatusCode(): " + deleteResponse.getStatusCode()); + assertEquals("Failed to delete the control multi-record set", 200, deleteResponse.getStatusCode()); } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index ac767279bd4..e669a268010 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -2641,7 +2641,18 @@ static Response getOaiRecord(String datasetPersistentId, String metadataFormat) } static Response getOaiListIdentifiers(String setName, String metadataFormat) { - String apiPath = String.format("/oai?verb=ListIdentifiers&set=%s&metadataPrefix=%s", setName, metadataFormat); + + String apiPath; + if (StringUtil.nonEmpty(setName)) { + apiPath = String.format("/oai?verb=ListIdentifiers&set=%s&metadataPrefix=%s", setName, metadataFormat); + } else { + apiPath = String.format("/oai?verb=ListIdentifiers&metadataPrefix=%s", metadataFormat); + } + return given().get(apiPath); + } + + static Response getOaiListIdentifiersWithResumptionToken(String resumptionToken) { + String apiPath = String.format("/oai?verb=ListIdentifiers&resumptionToken=%s", resumptionToken); return given().get(apiPath); } @@ -2649,6 +2660,11 @@ static Response getOaiListRecords(String setName, String metadataFormat) { String apiPath = String.format("/oai?verb=ListRecords&set=%s&metadataPrefix=%s", setName, metadataFormat); return given().get(apiPath); } + + static Response getOaiListRecordsWithResumptionToken(String resumptionToken) { + String apiPath = String.format("/oai?verb=ListRecords&resumptionToken=%s", resumptionToken); + return given().get(apiPath); + } static Response changeAuthenticatedUserIdentifier(String oldIdentifier, String newIdentifier, String apiToken) { Response response; From 18b058a088217f615744d79e3c2b8d8ebdbd41d2 Mon Sep 17 00:00:00 2001 From: Anthony Reyes Date: Mon, 12 Dec 2022 13:17:24 -0800 Subject: [PATCH 453/608] Update dataverse.xhtml Added contentTruncateForDataverse() to allow dataverse descriptions to have read more/read less buttons. --- src/main/webapp/dataverse.xhtml | 44 ++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/src/main/webapp/dataverse.xhtml b/src/main/webapp/dataverse.xhtml index 572bcf40300..98dc7664753 100644 --- a/src/main/webapp/dataverse.xhtml +++ b/src/main/webapp/dataverse.xhtml @@ -592,7 +592,7 @@
- +
@@ -828,7 +828,49 @@ $(document).ready(function () { initCarousel(); popoverHTML('#{bundle.htmlAllowedTitle}','#{bundle.htmlAllowedTags}'); + contentTruncateForDataverse(); }); + function contentTruncateForDataverse(){ + // SELECTOR ID FROM PARAMETERS + $('#dataverseDesc').each(function () { + + // add responsive img class to limit width to that of container + $(this).find('img').attr('class', 'img-responsive'); + + // find container height + var containerHeight = $(this).outerHeight(); + + if (containerHeight > 250) { + // ADD A MAX-HEIGHT TO CONTAINER + $(this).css({'max-height':'250px','overflow-y':'hidden','position':'relative'}); + + // BTN LABEL TEXT, ARIA ATTR'S, FROM BUNDLE VIA PARAMETERS + var readMoreBtn = ''; + var moreBlock = '
' + readMoreBtn + '
'; + var readLessBtn = ''; + var lessBlock = '
' + readLessBtn + '
'; + + // add "Read full desc [+]" btn, background fade + $(this).append(moreBlock); + + // show full description in summary block on "Read full desc [+]" btn click + $(document).on('click', 'button.desc-more-link', function() { + $(this).tooltip('hide').parent('div').parent('div').css({'max-height':'none','overflow-y':'visible','position':'relative'}); + $(this).parent('div.more-block').replaceWith(lessBlock); + $('.less-block button').tooltip(); + }); + + // trucnate description in summary block on "Collapse desc [-]" btn click + $(document).on('click', 'button.desc-less-link', function() { + $(this).tooltip('hide').parent('div').parent('div').css({'max-height':'250px','overflow-y':'hidden','position':'relative'}); + $(this).parent('div.less-block').replaceWith(moreBlock); + $('html, body').animate({scrollTop: $('#' + truncSelector).offset().top - 60}, 500); + $('.more-block button').tooltip(); + }); + } + }); + } + function initCarousel() { var owl1 = $("#featuredDataversesList"); owl1.owlCarousel({ From 395d605a8e156dd2ee295a8aa2a0892cad898617 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 12 Dec 2022 17:04:44 -0500 Subject: [PATCH 454/608] An automated test of an actual harvest (#8843) --- .../iq/dataverse/api/HarvestingClients.java | 31 +--- .../iq/dataverse/api/HarvestingClientsIT.java | 169 ++++++++++++++++-- .../iq/dataverse/api/HarvestingServerIT.java | 8 + 3 files changed, 164 insertions(+), 44 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java b/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java index 42534514b68..b75cb687c62 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java @@ -373,13 +373,13 @@ public Response startHarvestingJob(@PathParam("nickName") String clientNickname, } if (authenticatedUser == null || !authenticatedUser.isSuperuser()) { - return error(Response.Status.FORBIDDEN, "Only the Dataverse Admin user can run harvesting jobs"); + return error(Response.Status.FORBIDDEN, "Only admin users can run harvesting jobs"); } HarvestingClient harvestingClient = harvestingClientService.findByNickname(clientNickname); if (harvestingClient == null) { - return error(Response.Status.NOT_FOUND, "No such dataverse: "+clientNickname); + return error(Response.Status.NOT_FOUND, "No such client: "+clientNickname); } DataverseRequest dataverseRequest = createDataverseRequest(authenticatedUser); @@ -391,35 +391,8 @@ public Response startHarvestingJob(@PathParam("nickName") String clientNickname, return this.accepted(); } - // This GET shows the status of the harvesting run in progress for this - // client, if present: - // @GET - // @Path("{nickName}/run") - // TODO: - - // This DELETE kills the harvesting run in progress for this client, - // if present: - // @DELETE - // @Path("{nickName}/run") - // TODO: - - - - - /* Auxiliary, helper methods: */ - /* - @Deprecated - public static JsonArrayBuilder harvestingConfigsAsJsonArray(List harvestingDataverses) { - JsonArrayBuilder hdArr = Json.createArrayBuilder(); - - for (Dataverse hd : harvestingDataverses) { - hdArr.add(harvestingConfigAsJson(hd.getHarvestingClientConfig())); - } - return hdArr; - }*/ - public static JsonObjectBuilder harvestingConfigAsJson(HarvestingClient harvestingConfig) { if (harvestingConfig == null) { return null; diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java index 9eac3545e54..8fef360c68b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java @@ -1,34 +1,58 @@ package edu.harvard.iq.dataverse.api; import java.util.logging.Logger; +import java.util.logging.Level; import com.jayway.restassured.RestAssured; import static com.jayway.restassured.RestAssured.given; import org.junit.Test; import com.jayway.restassured.response.Response; +import static javax.ws.rs.core.Response.Status.CREATED; +import static javax.ws.rs.core.Response.Status.UNAUTHORIZED; +import static javax.ws.rs.core.Response.Status.ACCEPTED; +import static javax.ws.rs.core.Response.Status.OK; import static org.hamcrest.CoreMatchers.equalTo; -import static junit.framework.Assert.assertEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; import org.junit.BeforeClass; /** - * extremely minimal (for now) API tests for creating OAI clients. + * This class tests Harvesting Client functionality. + * Note that these methods test BOTH the proprietary Dataverse rest API for + * creating and managing harvesting clients, AND the underlining OAI-PMH + * harvesting functionality itself. I.e., we will use the Dataverse + * /api/harvest/clients/ api to run an actual harvest of a control set and + * then validate the resulting harvested content. */ public class HarvestingClientsIT { private static final Logger logger = Logger.getLogger(HarvestingClientsIT.class.getCanonicalName()); private static final String harvestClientsApi = "/api/harvest/clients/"; - private static final String harvestCollection = "root"; + private static final String rootCollection = "root"; private static final String harvestUrl = "https://demo.dataverse.org/oai"; private static final String archiveUrl = "https://demo.dataverse.org"; private static final String harvestMetadataFormat = "oai_dc"; private static final String archiveDescription = "RestAssured harvesting client test"; + private static final String controlOaiSet = "controlTestSet"; + private static final int datasetsInControlSet = 7; + private static String normalUserAPIKey; + private static String adminUserAPIKey; + private static String harvestCollectionAlias; @BeforeClass public static void setUpClass() { RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); + + // Create the users, an admin and a non-admin: + setupUsers(); + + // Create a collection that we will use to harvest remote content into: + setupCollection(); + } - private void setupUsers() { + private static void setupUsers() { Response cu0 = UtilIT.createRandomUser(); normalUserAPIKey = UtilIT.getApiTokenFromResponse(cu0); Response cu1 = UtilIT.createRandomUser(); @@ -36,13 +60,22 @@ private void setupUsers() { Response u1a = UtilIT.makeSuperUser(un1); adminUserAPIKey = UtilIT.getApiTokenFromResponse(cu1); } + + private static void setupCollection() { + Response createDataverseResponse = UtilIT.createRandomDataverse(adminUserAPIKey); + createDataverseResponse.prettyPrint(); + assertEquals(CREATED.getStatusCode(), createDataverseResponse.getStatusCode()); + + harvestCollectionAlias = UtilIT.getAliasFromResponse(createDataverseResponse); - private String normalUserAPIKey; - private String adminUserAPIKey; + // publish dataverse: + Response publishDataverse = UtilIT.publishDataverseViaNativeApi(harvestCollectionAlias, adminUserAPIKey); + assertEquals(OK.getStatusCode(), publishDataverse.getStatusCode()); + } @Test public void testCreateEditDeleteClient() { - setupUsers(); + //setupUsers(); String nickName = UtilIT.getRandomString(6); @@ -52,7 +85,7 @@ public void testCreateEditDeleteClient() { + "\"harvestUrl\":\"%s\"," + "\"archiveUrl\":\"%s\"," + "\"metadataFormat\":\"%s\"}", - harvestCollection, harvestUrl, archiveUrl, harvestMetadataFormat); + rootCollection, harvestUrl, archiveUrl, harvestMetadataFormat); // Try to create a client as normal user, should fail: @@ -61,7 +94,7 @@ public void testCreateEditDeleteClient() { .header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey) .body(clientJson) .post(clientApiPath); - assertEquals(401, rCreate.getStatusCode()); + assertEquals(UNAUTHORIZED.getStatusCode(), rCreate.getStatusCode()); // Try to create the same as admin user, should succeed: @@ -70,7 +103,7 @@ public void testCreateEditDeleteClient() { .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) .body(clientJson) .post(clientApiPath); - assertEquals(201, rCreate.getStatusCode()); + assertEquals(CREATED.getStatusCode(), rCreate.getStatusCode()); // Try to update the client we have just created: @@ -80,7 +113,7 @@ public void testCreateEditDeleteClient() { .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) .body(updateJson) .put(clientApiPath); - assertEquals(200, rUpdate.getStatusCode()); + assertEquals(OK.getStatusCode(), rUpdate.getStatusCode()); // Now let's retrieve the client we've just created and edited: @@ -89,7 +122,7 @@ public void testCreateEditDeleteClient() { logger.info("getClient.getStatusCode(): " + getClientResponse.getStatusCode()); logger.info("getClient printresponse: " + getClientResponse.prettyPrint()); - assertEquals(200, getClientResponse.getStatusCode()); + assertEquals(OK.getStatusCode(), getClientResponse.getStatusCode()); // ... and validate the values: @@ -98,7 +131,7 @@ public void testCreateEditDeleteClient() { .body("data.type", equalTo("oai")) .body("data.nickName", equalTo(nickName)) .body("data.archiveDescription", equalTo(archiveDescription)) - .body("data.dataverseAlias", equalTo(harvestCollection)) + .body("data.dataverseAlias", equalTo(rootCollection)) .body("data.harvestUrl", equalTo(harvestUrl)) .body("data.archiveUrl", equalTo(archiveUrl)) .body("data.metadataFormat", equalTo(harvestMetadataFormat)); @@ -109,7 +142,7 @@ public void testCreateEditDeleteClient() { .header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey) .delete(clientApiPath); logger.info("rDelete.getStatusCode(): " + rDelete.getStatusCode()); - assertEquals(401, rDelete.getStatusCode()); + assertEquals(UNAUTHORIZED.getStatusCode(), rDelete.getStatusCode()); // Try to delete as admin user should work: @@ -117,6 +150,112 @@ public void testCreateEditDeleteClient() { .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) .delete(clientApiPath); logger.info("rDelete.getStatusCode(): " + rDelete.getStatusCode()); - assertEquals(200, rDelete.getStatusCode()); + assertEquals(OK.getStatusCode(), rDelete.getStatusCode()); + } + + @Test + public void testHarvestingClientRun() throws InterruptedException { + // This test will create a client and attempt to perform an actual + // harvest and validate the resulting harvested content. + + // Setup: create the client via the API + // since this API is tested somewhat extensively in the previous + // method, we don't need to pay too much attention to this method, aside + // from confirming the expected HTTP status code. + + String nickName = UtilIT.getRandomString(6); + + String clientApiPath = String.format(harvestClientsApi+"%s", nickName); + String clientJson = String.format("{\"dataverseAlias\":\"%s\"," + + "\"type\":\"oai\"," + + "\"harvestUrl\":\"%s\"," + + "\"archiveUrl\":\"%s\"," + + "\"set\":\"%s\"," + + "\"metadataFormat\":\"%s\"}", + harvestCollectionAlias, harvestUrl, archiveUrl, controlOaiSet, harvestMetadataFormat); + + Response createResponse = given() + .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) + .body(clientJson) + .post(clientApiPath); + assertEquals(CREATED.getStatusCode(), createResponse.getStatusCode()); + + // API TEST 1. Run the harvest using the configuration (client) we have + // just created + + String runHarvestApiPath = String.format(harvestClientsApi+"%s/run", nickName); + + // TODO? - verify that a non-admin user cannot perform this operation (401) + + Response runResponse = given() + .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) + .post(runHarvestApiPath); + assertEquals(ACCEPTED.getStatusCode(), runResponse.getStatusCode()); + + // API TEST 2. As indicated by the ACCEPTED status code above, harvesting + // is an asynchronous operation that will be performed in the background. + // Verify that this "in progress" status is properly reported while it's + // running, and that it completes in some reasonable amount of time. + + int i = 0; + int maxWait=20; // a very conservative interval; this harvest has no business taking this long + do { + // keep checking the status of the client with the GET api: + Response getClientResponse = given() + .get(clientApiPath); + + assertEquals(OK.getStatusCode(), getClientResponse.getStatusCode()); + assertEquals(AbstractApiBean.STATUS_OK, getClientResponse.body().jsonPath().getString("status")); + + if (logger.isLoggable(Level.FINE)) { + logger.info("listIdentifiersResponse.prettyPrint: " + + getClientResponse.prettyPrint()); + } + + String clientStatus = getClientResponse.body().jsonPath().getString("data.status"); + assertNotNull(clientStatus); + + if ("inProgress".equals(clientStatus)) { + // we'll sleep for another second + i++; + } else { + // Check the values in the response: + // a) Confirm that the harvest has completed: + assertEquals("Unexpected client status: "+clientStatus, "inActive", clientStatus); + + // b) Confirm that it has actually succeeded: + assertEquals("Last harvest not reported a success", "SUCCESS", getClientResponse.body().jsonPath().getString("data.lastResult")); + String harvestTimeStamp = getClientResponse.body().jsonPath().getString("data.lastHarvest"); + assertNotNull(harvestTimeStamp); + + // c) Confirm that the other timestamps match: + assertEquals(harvestTimeStamp, getClientResponse.body().jsonPath().getString("data.lastSuccessful")); + assertEquals(harvestTimeStamp, getClientResponse.body().jsonPath().getString("data.lastNonEmpty")); + + // d) Confirm that the correct number of datasets have been harvested: + assertEquals(datasetsInControlSet, getClientResponse.body().jsonPath().getInt("data.lastDatasetsHarvested")); + + // ok, it looks like the harvest has completed successfully. + break; + } + Thread.sleep(1000L); + } while (i Date: Mon, 12 Dec 2022 17:10:35 -0500 Subject: [PATCH 455/608] comments (#8843) --- .../iq/dataverse/api/HarvestingClientsIT.java | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java index 8fef360c68b..448faa20b0b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java @@ -75,7 +75,9 @@ private static void setupCollection() { @Test public void testCreateEditDeleteClient() { - //setupUsers(); + // This method focuses on testing the native Dataverse harvesting client + // API. + String nickName = UtilIT.getRandomString(6); @@ -158,7 +160,7 @@ public void testHarvestingClientRun() throws InterruptedException { // This test will create a client and attempt to perform an actual // harvest and validate the resulting harvested content. - // Setup: create the client via the API + // Setup: create the client via native API // since this API is tested somewhat extensively in the previous // method, we don't need to pay too much attention to this method, aside // from confirming the expected HTTP status code. @@ -246,8 +248,11 @@ public void testHarvestingClientRun() throws InterruptedException { // Fail if it hasn't completed in maxWait seconds assertTrue(i < maxWait); - // TODO: use the native Dataverses/Datasets apis to verify that the expected - // datasets have been harvested. + // TODO(?) use the native Dataverses/Datasets apis to verify that the expected + // datasets have been harvested. This may or may not be necessary, seeing + // how we have already confirmed the number of successfully harvested + // datasets from the control set; somewhat hard to imagine a practical + // situation where that would not be enough (?). // Cleanup: delete the client From 9dcbfa05de4316cc3c5560e5350a1f46ebf30d4a Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 13 Dec 2022 17:57:03 +0100 Subject: [PATCH 456/608] revert(metadata): remove CodeMeta fields from Solr schema #7844 This reverts commit 8d5edf23a13631e878c413e55c320cb704a579b5. @IQSS decided we will not include fields from experimental blocks in the schema. --- conf/solr/8.11.1/schema.xml | 48 +------------------------------------ 1 file changed, 1 insertion(+), 47 deletions(-) diff --git a/conf/solr/8.11.1/schema.xml b/conf/solr/8.11.1/schema.xml index 2656abf0dc5..63312ab5d40 100644 --- a/conf/solr/8.11.1/schema.xml +++ b/conf/solr/8.11.1/schema.xml @@ -405,31 +405,9 @@ - - - - - - - - - - - - - - - - - - - - - - - + @@ -667,30 +645,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - + SAML2 SAML1 From d5a86439a44b5b4dfd561d8eac73e915f394612d Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Fri, 13 Jan 2023 09:34:22 -0500 Subject: [PATCH 533/608] #8724 show child ds in linking dv --- .../java/edu/harvard/iq/dataverse/search/IndexServiceBean.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index e2f2b3adcfd..09cd8a72f0c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -1497,6 +1497,7 @@ private List findAllLinkingDataverses(DvObject dvObject){ dataset = (Dataset) dvObject; linkingDataverses = dsLinkingService.findLinkingDataverses(dataset.getId()); ancestorList = dataset.getOwner().getOwners(); + ancestorList.add(dataset.getOwner()); //to show dataset in linking dv when parent dv is linked } if(dvObject.isInstanceofDataverse()){ dv = (Dataverse) dvObject; From 2efd8a4e12967916032e315eb69fdecbd51a738b Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 13 Jan 2023 16:14:25 -0500 Subject: [PATCH 534/608] A proof-of-concept quick implementation of "custom headers in OAI calls", #9231 --- .../iq/dataverse/api/HarvestingClients.java | 42 +-- .../harvest/client/HarvestingClient.java | 100 +------ .../client/oai/CustomJdkHttpXoaiClient.java | 259 ++++++++++++++++++ .../harvest/client/oai/OaiHandler.java | 49 +++- .../iq/dataverse/util/json/JsonParser.java | 1 + .../iq/dataverse/util/json/JsonPrinter.java | 29 +- 6 files changed, 355 insertions(+), 125 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/CustomJdkHttpXoaiClient.java diff --git a/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java b/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java index b75cb687c62..9aea3adab8b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java @@ -15,6 +15,7 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.json.JsonParseException; +import edu.harvard.iq.dataverse.util.json.JsonPrinter; import javax.json.JsonObjectBuilder; import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; import java.io.IOException; @@ -88,7 +89,7 @@ public Response harvestingClients(@QueryParam("key") String apiKey) throws IOExc } if (retrievedHarvestingClient != null) { - hcArr.add(harvestingConfigAsJson(retrievedHarvestingClient)); + hcArr.add(JsonPrinter.json(retrievedHarvestingClient)); } } @@ -136,7 +137,7 @@ public Response harvestingClient(@PathParam("nickName") String nickName, @QueryP } try { - return ok(harvestingConfigAsJson(retrievedHarvestingClient)); + return ok(JsonPrinter.json(retrievedHarvestingClient)); } catch (Exception ex) { logger.warning("Unknown exception caught while trying to format harvesting client config as json: "+ex.getMessage()); return error( Response.Status.BAD_REQUEST, @@ -216,7 +217,7 @@ public Response createHarvestingClient(String jsonBody, @PathParam("nickName") S DataverseRequest req = createDataverseRequest(findUserOrDie()); harvestingClient = execCommand(new CreateHarvestingClientCommand(req, harvestingClient)); - return created( "/harvest/clients/" + nickName, harvestingConfigAsJson(harvestingClient)); + return created( "/harvest/clients/" + nickName, JsonPrinter.json(harvestingClient)); } catch (JsonParseException ex) { return error( Response.Status.BAD_REQUEST, "Error parsing harvesting client: " + ex.getMessage() ); @@ -268,6 +269,8 @@ public Response modifyHarvestingClient(String jsonBody, @PathParam("nickName") S } // Go through the supported editable fields and update the client accordingly: + // TODO: We may want to reevaluate whether we really want/need *all* + // of these fields to be editable. if (newHarvestingClient.getHarvestingUrl() != null) { harvestingClient.setHarvestingUrl(newHarvestingClient.getHarvestingUrl()); @@ -287,10 +290,13 @@ public Response modifyHarvestingClient(String jsonBody, @PathParam("nickName") S if (newHarvestingClient.getHarvestStyle() != null) { harvestingClient.setHarvestStyle(newHarvestingClient.getHarvestStyle()); } + if (newHarvestingClient.getCustomHttpHeaders() != null) { + harvestingClient.setCustomHttpHeaders(newHarvestingClient.getCustomHttpHeaders()); + } // TODO: Make schedule configurable via this API too. harvestingClient = execCommand( new UpdateHarvestingClientCommand(req, harvestingClient)); - return ok( "/harvest/clients/" + nickName, harvestingConfigAsJson(harvestingClient)); + return ok( "/harvest/clients/" + nickName, JsonPrinter.json(harvestingClient)); // harvestingConfigAsJson(harvestingClient)); } catch (JsonParseException ex) { return error( Response.Status.BAD_REQUEST, "Error parsing harvesting client: " + ex.getMessage() ); @@ -390,32 +396,4 @@ public Response startHarvestingJob(@PathParam("nickName") String clientNickname, } return this.accepted(); } - - /* Auxiliary, helper methods: */ - - public static JsonObjectBuilder harvestingConfigAsJson(HarvestingClient harvestingConfig) { - if (harvestingConfig == null) { - return null; - } - - - return jsonObjectBuilder().add("nickName", harvestingConfig.getName()). - add("dataverseAlias", harvestingConfig.getDataverse().getAlias()). - add("type", harvestingConfig.getHarvestType()). - add("style", harvestingConfig.getHarvestStyle()). - add("harvestUrl", harvestingConfig.getHarvestingUrl()). - add("archiveUrl", harvestingConfig.getArchiveUrl()). - add("archiveDescription",harvestingConfig.getArchiveDescription()). - add("metadataFormat", harvestingConfig.getMetadataPrefix()). - add("set", harvestingConfig.getHarvestingSet() == null ? "N/A" : harvestingConfig.getHarvestingSet()). - add("schedule", harvestingConfig.isScheduled() ? harvestingConfig.getScheduleDescription() : "none"). - add("status", harvestingConfig.isHarvestingNow() ? "inProgress" : "inActive"). - add("lastHarvest", harvestingConfig.getLastHarvestTime() == null ? "N/A" : harvestingConfig.getLastHarvestTime().toString()). - add("lastResult", harvestingConfig.getLastResult()). - add("lastSuccessful", harvestingConfig.getLastSuccessfulHarvestTime() == null ? "N/A" : harvestingConfig.getLastSuccessfulHarvestTime().toString()). - add("lastNonEmpty", harvestingConfig.getLastNonEmptyHarvestTime() == null ? "N/A" : harvestingConfig.getLastNonEmptyHarvestTime().toString()). - add("lastDatasetsHarvested", harvestingConfig.getLastHarvestedDatasetCount() == null ? "N/A" : harvestingConfig.getLastHarvestedDatasetCount().toString()). - add("lastDatasetsDeleted", harvestingConfig.getLastDeletedDatasetCount() == null ? "N/A" : harvestingConfig.getLastDeletedDatasetCount().toString()). - add("lastDatasetsFailed", harvestingConfig.getLastFailedDatasetCount() == null ? "N/A" : harvestingConfig.getLastFailedDatasetCount().toString()); - } } diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java index aeb010fad6d..d27ddc41b7f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java @@ -234,6 +234,16 @@ public void setMetadataPrefix(String metadataPrefix) { this.metadataPrefix = metadataPrefix; } + private String customHttpHeaders; + + public String getCustomHttpHeaders() { + return customHttpHeaders; + } + + public void setCustomHttpHeaders(String customHttpHeaders) { + this.customHttpHeaders = customHttpHeaders; + } + // TODO: do we need "orphanRemoval=true"? -- L.A. 4.4 // TODO: should it be @OrderBy("startTime")? -- L.A. 4.4 @OneToMany(mappedBy="harvestingClient", cascade={CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST}) @@ -345,95 +355,7 @@ public Long getLastDeletedDatasetCount() { return lastNonEmptyHarvest.getDeletedDatasetCount(); } return null; - } - - /* move the fields below to the new HarvestingClientRun class: - private String harvestResult; - - public String getResult() { - return harvestResult; - } - - public void setResult(String harvestResult) { - this.harvestResult = harvestResult; - } - - // "Last Harvest Time" is the last time we *attempted* to harvest - // from this remote resource. - // It wasn't necessarily a successful attempt! - - @Temporal(value = TemporalType.TIMESTAMP) - private Date lastHarvestTime; - - public Date getLastHarvestTime() { - return lastHarvestTime; - } - - public void setLastHarvestTime(Date lastHarvestTime) { - this.lastHarvestTime = lastHarvestTime; - } - - // This is the last "successful harvest" - i.e., the last time we - // tried to harvest, and got a response from the remote server. - // We may not have necessarily harvested any useful content though; - // the result may have been a "no content" or "no changes since the last harvest" - // response. - - @Temporal(value = TemporalType.TIMESTAMP) - private Date lastSuccessfulHarvestTime; - - public Date getLastSuccessfulHarvestTime() { - return lastSuccessfulHarvestTime; - } - - public void setLastSuccessfulHarvestTime(Date lastSuccessfulHarvestTime) { - this.lastSuccessfulHarvestTime = lastSuccessfulHarvestTime; - } - - // Finally, this is the time stamp from the last "non-empty" harvest. - // I.e. the last time we ran a harvest that actually resulted in - // some Datasets created, updated or deleted: - - @Temporal(value = TemporalType.TIMESTAMP) - private Date lastNonEmptyHarvestTime; - - public Date getLastNonEmptyHarvestTime() { - return lastNonEmptyHarvestTime; - } - - public void setLastNonEmptyHarvestTime(Date lastNonEmptyHarvestTime) { - this.lastNonEmptyHarvestTime = lastNonEmptyHarvestTime; - } - - // And these are the Dataset counts from that last "non-empty" harvest: - private Long harvestedDatasetCount; - private Long failedDatasetCount; - private Long deletedDatasetCount; - - public Long getLastHarvestedDatasetCount() { - return harvestedDatasetCount; - } - - public void setHarvestedDatasetCount(Long harvestedDatasetCount) { - this.harvestedDatasetCount = harvestedDatasetCount; - } - - public Long getLastFailedDatasetCount() { - return failedDatasetCount; - } - - public void setFailedDatasetCount(Long failedDatasetCount) { - this.failedDatasetCount = failedDatasetCount; - } - - public Long getLastDeletedDatasetCount() { - return deletedDatasetCount; - } - - public void setDeletedDatasetCount(Long deletedDatasetCount) { - this.deletedDatasetCount = deletedDatasetCount; - } - */ + } private boolean scheduled; diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/CustomJdkHttpXoaiClient.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/CustomJdkHttpXoaiClient.java new file mode 100644 index 00000000000..25c3a048219 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/CustomJdkHttpXoaiClient.java @@ -0,0 +1,259 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package edu.harvard.iq.dataverse.harvest.client.oai; + +import io.gdcc.xoai.serviceprovider.client.OAIClient; + +import io.gdcc.xoai.serviceprovider.exceptions.OAIRequestException; +import io.gdcc.xoai.serviceprovider.parameters.Parameters; +import java.io.IOException; +import java.io.InputStream; +import static java.net.HttpURLConnection.HTTP_OK; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.nio.charset.StandardCharsets; +import java.security.KeyManagementException; +import java.security.NoSuchAlgorithmException; +import java.security.cert.X509Certificate; +import java.time.Duration; +import java.util.List; +import java.util.ListIterator; +import javax.net.ssl.SSLContext; +import javax.net.ssl.TrustManager; +import javax.net.ssl.X509TrustManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.http.Header; + +/** + * Sane default OAI Client implementation using JDK HTTP Client. Can only be used via builder in + * calling code. + * (this is essentially a copy of the final class JdkHttpOaiClient provided by + * gdcc.xoai, with the custom http headers added. proof of concept! + */ +public final class CustomJdkHttpXoaiClient extends OAIClient { + + private static final Logger log = LoggerFactory.getLogger(OAIClient.class.getCanonicalName()); + + // As these vars will be feed via the builder and those provide defaults and null-checks, + // we may assume FOR INTERNAL USE these are not null. + private final String baseUrl; + private final String userAgent; + private final Duration requestTimeout; + private final HttpClient httpClient; + // Custom headers are optional though, ok to be null: + private final List
customHeaders; + + + CustomJdkHttpXoaiClient( + String baseUrl, String userAgent, Duration requestTimeout, List
customHeaders, HttpClient httpClient) { + this.baseUrl = baseUrl; + this.userAgent = userAgent; + this.requestTimeout = requestTimeout; + this.httpClient = httpClient; + this.customHeaders = customHeaders; + } + + @Override + public InputStream execute(Parameters parameters) throws OAIRequestException { + try { + URI requestURI = URI.create(parameters.toUrl(this.baseUrl)); + + HttpRequest.Builder httpRequestBuilder = HttpRequest.newBuilder() + .uri(requestURI) + .GET() + .header("User-Agent", this.userAgent) + .timeout(requestTimeout); + + // add custom headers, if present: + if (customHeaders != null) { + ListIterator
iterator = customHeaders.listIterator(); + while (iterator.hasNext()) { + Header customHeader = iterator.next(); + httpRequestBuilder.header(customHeader.getName(), customHeader.getValue()); + } + } + + HttpRequest request = httpRequestBuilder.build(); + + HttpResponse response = + this.httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream()); + + if (response.statusCode() == HTTP_OK) { + return response.body(); + } else { + // copy body of the response to string and send as exception message + throw new OAIRequestException( + "Query faild with status code " + + response.statusCode() + + ": " + + new String( + response.body().readAllBytes(), StandardCharsets.UTF_8)); + } + } catch (IllegalArgumentException | IOException | InterruptedException ex) { + // Hint by SonarCloud: + // https://sonarcloud.io/organizations/gdcc/rules?open=java%3AS2142&rule_key=java%3AS2142 + Thread.currentThread().interrupt(); + throw new OAIRequestException(ex); + } + } + + /*@Override + JdkHttpBuilder newBuilder() { + return new CustomJdkHttpXoaiClient.JdkHttpBuilder(); + }*/ + + /** + * Build an {@link OAIClient} using the JDK native HTTP client. You may use your own prepared + * {@link HttpClient.Builder} instead of the default one. + * + *

Provides defaults for request timeouts (60s) and user agent. Remember to set the base + * OAI-PMH URL via {@link #withBaseUrl(URL)}. An exception will occur on first request + * otherwise. + */ + public static final class JdkHttpBuilder implements OAIClient.Builder { + private String baseUrl = "Must be set via Builder.withBaseUrl()"; + private String userAgent = "XOAI Service Provider v5"; + private Duration requestTimeout = Duration.ofSeconds(60); + private List

customHeaders = null; + private final HttpClient.Builder httpClientBuilder; + + JdkHttpBuilder() { + this.httpClientBuilder = HttpClient.newBuilder(); + } + + /** + * While the default constructor can be accessed via {@link OAIClient#newBuilder()}, if + * someone provides a {@link HttpClient.Builder} (which might already contain + * configuration), happily work with it. + * + * @param httpClientBuilder Any (preconfigured) Java 11+ HTTP client builder + */ + public JdkHttpBuilder(HttpClient.Builder httpClientBuilder) { + this.httpClientBuilder = httpClientBuilder; + } + + @Override + public JdkHttpBuilder withBaseUrl(URL baseUrl) { + return this.withBaseUrl(baseUrl.toString()); + } + + @Override + public JdkHttpBuilder withBaseUrl(String baseUrl) { + try { + new URL(baseUrl).toURI(); + if (!baseUrl.startsWith("http")) { + throw new IllegalArgumentException("OAI-PMH supports HTTP/S only"); + } + this.baseUrl = baseUrl; + return this; + } catch (MalformedURLException | URISyntaxException e) { + throw new IllegalArgumentException(e); + } + } + + @Override + public JdkHttpBuilder withConnectTimeout(Duration timeout) { + // validation is done by client builder! + httpClientBuilder.connectTimeout(timeout); + return this; + } + + @Override + public JdkHttpBuilder withRequestTimeout(Duration timeout) { + if (timeout == null || timeout.isNegative()) { + throw new IllegalArgumentException("Timeout must not be null or negative value"); + } + this.requestTimeout = timeout; + return this; + } + + @Override + public JdkHttpBuilder withUserAgent(String userAgent) { + if (userAgent == null || userAgent.isBlank()) { + throw new IllegalArgumentException("User agent must not be null or empty/blank"); + } + this.userAgent = userAgent; + return this; + } + + @Override + public JdkHttpBuilder withFollowRedirects() { + this.httpClientBuilder.followRedirects(HttpClient.Redirect.NORMAL); + return this; + } + + @Override + public JdkHttpBuilder withInsecureSSL() { + // create insecure context (switch of certificate checks) + httpClientBuilder.sslContext(insecureContext()); + + // warn if the hostname verification is still active + // (users must do this themselves - it's a global setting and might pose a security + // risk) + if (!Boolean.getBoolean("jdk.internal.httpclient.disableHostnameVerification")) { + log.warn( + "You must disable JDK HTTP Client Host Name Verification globally via" + + " system property" + + " -Djdk.internal.httpclient.disableHostnameVerification=true for" + + " XOAI Client connections to insecure SSL servers. Don't do this in" + + " a production setup!"); + } + return this; + } + + public JdkHttpBuilder withCustomHeaders(List
customHeaders) { + // This can be null, as these headers are optional + this.customHeaders = customHeaders; + return this; + } + + @Override + public CustomJdkHttpXoaiClient build() { + return new CustomJdkHttpXoaiClient( + this.baseUrl, this.userAgent, this.requestTimeout, this.customHeaders, httpClientBuilder.build()); + } + + private static SSLContext insecureContext() { + TrustManager[] noopTrustManager = + new TrustManager[] { + new X509TrustManager() { + // This is insecure by design, we warn users and they need to do sth. to + // use it. + // Safely ignore the Sonarcloud message. + @SuppressWarnings("java:S4830") + public void checkClientTrusted(X509Certificate[] xcs, String string) { + // we want to accept every certificate - intentionally left blank + } + // This is insecure by design, we warn users and they need to do sth. to + // use it. + // Safely ignore the Sonarcloud message. + @SuppressWarnings("java:S4830") + public void checkServerTrusted(X509Certificate[] xcs, String string) { + // we want to accept every certificate - intentionally left blank + } + + public X509Certificate[] getAcceptedIssuers() { + return new X509Certificate[0]; + } + } + }; + try { + SSLContext sc = SSLContext.getInstance("TLSv1.2"); + sc.init(null, noopTrustManager, null); + return sc; + } catch (KeyManagementException | NoSuchAlgorithmException ex) { + log.error("Could not build insecure SSL context. Might cause NPE.", ex); + return null; + } + } + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java index c0a039e2d2b..ae297416ff9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java @@ -5,7 +5,6 @@ import io.gdcc.xoai.model.oaipmh.results.MetadataFormat; import io.gdcc.xoai.model.oaipmh.results.Set; import io.gdcc.xoai.serviceprovider.ServiceProvider; -import io.gdcc.xoai.serviceprovider.client.JdkHttpOaiClient; import io.gdcc.xoai.serviceprovider.exceptions.BadArgumentException; import io.gdcc.xoai.serviceprovider.exceptions.InvalidOAIResponse; import io.gdcc.xoai.serviceprovider.exceptions.NoSetHierarchyException; @@ -26,12 +25,15 @@ import java.util.Date; import java.util.Iterator; import java.util.List; +import java.util.logging.Logger; +import org.apache.http.message.BasicHeader; /** * * @author Leonid Andreev */ public class OaiHandler implements Serializable { + private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.oai.OaiHandler"); public OaiHandler() { @@ -65,6 +67,9 @@ public OaiHandler(HarvestingClient harvestingClient) throws OaiHandlerException this.fromDate = harvestingClient.getLastNonEmptyHarvestTime(); + this.customHeaders = makeCustomHeaders(harvestingClient.getCustomHttpHeaders()); + //test: this.customHeaders = makeCustomHeaders("x-api-key: xxx-yyy-zzz\\ny-api-key: zzz-yyy-xxx"); + this.harvestingClient = harvestingClient; } @@ -74,6 +79,7 @@ public OaiHandler(HarvestingClient harvestingClient) throws OaiHandlerException private String setName; private Date fromDate; private Boolean setListTruncated = false; + private List customHeaders = null; private ServiceProvider serviceProvider; @@ -119,6 +125,14 @@ public boolean isSetListTruncated() { return setListTruncated; } + public List getCustomHeaders() { + return this.customHeaders; + } + + public void setCustomHeaders(List customHeaders) { + this.customHeaders = customHeaders; + } + public ServiceProvider getServiceProvider() throws OaiHandlerException { if (serviceProvider == null) { if (baseOaiUrl == null) { @@ -128,8 +142,17 @@ public ServiceProvider getServiceProvider() throws OaiHandlerException { context.withBaseUrl(baseOaiUrl); context.withGranularity(Granularity.Second); - // builds the client with the default parameters and the JDK http client: - context.withOAIClient(JdkHttpOaiClient.newBuilder().withBaseUrl(baseOaiUrl).build()); + // builds the client based on the default client provided in xoai, + // with the same default parameters and the JDK http client, with + // just the (optional) custom headers added: + // (this is proof-of-concept implementation; there gotta be a prettier way to do this) + //context.withOAIClient(JdkHttpOaiClient.newBuilder().withBaseUrl(baseOaiUrl).build()); + if (getCustomHeaders() != null) { + for (org.apache.http.Header customHeader : getCustomHeaders()) { + logger.info("will add custom header; name: "+customHeader.getName()+", value: "+customHeader.getValue()); + } + } + context.withOAIClient((new CustomJdkHttpXoaiClient.JdkHttpBuilder()).withBaseUrl(getBaseOaiUrl()).withCustomHeaders(getCustomHeaders()).build()); serviceProvider = new ServiceProvider(context); } @@ -293,4 +316,24 @@ public void runIdentify() { // (we will need it, both for validating the remote server, // and to learn about its extended capabilities) } + + private List makeCustomHeaders(String headersString) { + if (headersString != null) { + List ret = new ArrayList<>(); + String[] parts = headersString.split("\\\\n"); + + for (int i = 0; i < parts.length; i++) { + if (parts[i].indexOf(':') > 0) { + String headerName = parts[i].substring(0, parts[i].indexOf(':')); + String headerValue = parts[i].substring(parts[i].indexOf(':')+1).strip(); + ret.add(new BasicHeader(headerName, headerValue)); + } + // simply skipping it if malformed; or we could throw an exception - ? + } + if (!ret.isEmpty()) { + return ret; + } + } + return null; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java index 905479c4e0d..22e2c6c8d78 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java @@ -908,6 +908,7 @@ public String parseHarvestingClient(JsonObject obj, HarvestingClient harvestingC harvestingClient.setArchiveDescription(obj.getString("archiveDescription", null)); harvestingClient.setMetadataPrefix(obj.getString("metadataFormat",null)); harvestingClient.setHarvestingSet(obj.getString("set",null)); + harvestingClient.setCustomHttpHeaders(obj.getString("customHeaders", null)); return dataverseAlias; } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index dc547f2e52c..1ab596569a4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -37,6 +37,7 @@ import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.license.License; import edu.harvard.iq.dataverse.globus.FileDetailsHolder; +import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.DatasetFieldWalker; @@ -666,6 +667,32 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata) { ; } + public static JsonObjectBuilder json(HarvestingClient harvestingClient) { + if (harvestingClient == null) { + return null; + } + + return jsonObjectBuilder().add("nickName", harvestingClient.getName()). + add("dataverseAlias", harvestingClient.getDataverse().getAlias()). + add("type", harvestingClient.getHarvestType()). + add("style", harvestingClient.getHarvestStyle()). + add("harvestUrl", harvestingClient.getHarvestingUrl()). + add("archiveUrl", harvestingClient.getArchiveUrl()). + add("archiveDescription", harvestingClient.getArchiveDescription()). + add("metadataFormat", harvestingClient.getMetadataPrefix()). + add("set", harvestingClient.getHarvestingSet()). + add("schedule", harvestingClient.isScheduled() ? harvestingClient.getScheduleDescription() : "none"). + add("status", harvestingClient.isHarvestingNow() ? "inProgress" : "inActive"). + add("customHeaders", harvestingClient.getCustomHttpHeaders()). + add("lastHarvest", harvestingClient.getLastHarvestTime() == null ? null : harvestingClient.getLastHarvestTime().toString()). + add("lastResult", harvestingClient.getLastResult()). + add("lastSuccessful", harvestingClient.getLastSuccessfulHarvestTime() == null ? null : harvestingClient.getLastSuccessfulHarvestTime().toString()). + add("lastNonEmpty", harvestingClient.getLastNonEmptyHarvestTime() == null ? null : harvestingClient.getLastNonEmptyHarvestTime().toString()). + add("lastDatasetsHarvested", harvestingClient.getLastHarvestedDatasetCount()). // == null ? "N/A" : harvestingClient.getLastHarvestedDatasetCount().toString()). + add("lastDatasetsDeleted", harvestingClient.getLastDeletedDatasetCount()). // == null ? "N/A" : harvestingClient.getLastDeletedDatasetCount().toString()). + add("lastDatasetsFailed", harvestingClient.getLastFailedDatasetCount()); // == null ? "N/A" : harvestingClient.getLastFailedDatasetCount().toString()); + } + public static String format(Date d) { return (d == null) ? null : Util.getDateTimeFormat().format(d); } @@ -702,7 +729,7 @@ public static JsonArrayBuilder getTabularFileTags(DataFile df) { } return tabularTags; } - + private static class DatasetFieldsToJson implements DatasetFieldWalker.Listener { Deque objectStack = new LinkedList<>(); From 019fb749b11abdba75e3d058c9c5d38b07e50bae Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 17 Jan 2023 13:53:40 -0500 Subject: [PATCH 535/608] Added the custom header configuration to the harvesting clients GUI (#9231). --- .../iq/dataverse/HarvestingClientsPage.java | 46 +++++++++++++++++-- src/main/java/propertyFiles/Bundle.properties | 4 ++ src/main/webapp/harvestclients.xhtml | 17 +++++++ 3 files changed, 64 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java b/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java index bc83c15dcd7..4430a7be73a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java @@ -79,7 +79,7 @@ public class HarvestingClientsPage implements java.io.Serializable { private Dataverse dataverse; private Long dataverseId = null; private HarvestingClient selectedClient; - private boolean setListTruncated = false; + private boolean setListTruncated = false; //private static final String solrDocIdentifierDataset = "dataset_"; @@ -245,6 +245,7 @@ public void editClient(HarvestingClient harvestingClient) { this.newNickname = harvestingClient.getName(); this.newHarvestingUrl = harvestingClient.getHarvestingUrl(); + this.customHeader = harvestingClient.getCustomHttpHeaders(); this.initialSettingsValidated = false; // TODO: do we want to try and contact the server, again, to make @@ -340,6 +341,7 @@ public void createClient(ActionEvent ae) { getSelectedDestinationDataverse().getHarvestingClientConfigs().add(newHarvestingClient); newHarvestingClient.setHarvestingUrl(newHarvestingUrl); + newHarvestingClient.setCustomHttpHeaders(customHeader); if (!StringUtils.isEmpty(newOaiSet)) { newHarvestingClient.setHarvestingSet(newOaiSet); } @@ -426,6 +428,7 @@ public void saveClient(ActionEvent ae) { // nickname is not editable for existing clients: //harvestingClient.setName(newNickname); harvestingClient.setHarvestingUrl(newHarvestingUrl); + harvestingClient.setCustomHttpHeaders(customHeader); harvestingClient.setHarvestingSet(newOaiSet); harvestingClient.setMetadataPrefix(newMetadataFormat); harvestingClient.setHarvestStyle(newHarvestingStyle); @@ -635,6 +638,23 @@ public boolean validateServerUrlOAI() { return false; } + public boolean validateCustomHeader() { + if (!StringUtils.isEmpty(getCustomHeader())) { + // TODO: put this method somewhere else as a static utility + + // check that it's looking like "{header-name}: {header value}" at least + if (!Pattern.matches("^[a-zA-Z0-9\\_\\-]+:.*",getCustomHeader())) { + FacesContext.getCurrentInstance().addMessage(getNewClientCustomHeaderInputField().getClientId(), + new FacesMessage(FacesMessage.SEVERITY_ERROR, "", BundleUtil.getStringFromBundle("harvestclients.newClientDialog.customHeader.invalid"))); + + return false; + } + } + + // this setting is optional + return true; + } + public void validateInitialSettings() { if (isHarvestTypeOAI()) { boolean nicknameValidated = true; @@ -644,9 +664,10 @@ public void validateInitialSettings() { destinationDataverseValidated = validateSelectedDataverse(); } boolean urlValidated = validateServerUrlOAI(); + boolean customHeaderValidated = validateCustomHeader(); - if (nicknameValidated && destinationDataverseValidated && urlValidated) { - // In Create mode we want to run all 3 validation tests; this is why + if (nicknameValidated && destinationDataverseValidated && urlValidated && customHeaderValidated) { + // In Create mode we want to run all 4 validation tests; this is why // we are not doing "if ((validateNickname() && validateServerUrlOAI())" // in the line above. -- L.A. 4.4 May 2016. @@ -688,6 +709,7 @@ public void backToStepThree() { UIInput newClientNicknameInputField; UIInput newClientUrlInputField; + UIInput newClientCustomHeaderInputField; UIInput hiddenInputField; /*UISelectOne*/ UIInput metadataFormatMenu; UIInput remoteArchiveStyleMenu; @@ -695,6 +717,7 @@ public void backToStepThree() { private String newNickname = ""; private String newHarvestingUrl = ""; + private String customHeader = null; private boolean initialSettingsValidated = false; private String newOaiSet = ""; private String newMetadataFormat = ""; @@ -718,6 +741,7 @@ public void initNewClient(ActionEvent ae) { //this.selectedClient = new HarvestingClient(); this.newNickname = ""; this.newHarvestingUrl = ""; + this.customHeader = null; this.initialSettingsValidated = false; this.newOaiSet = ""; this.newMetadataFormat = ""; @@ -762,6 +786,14 @@ public void setNewHarvestingUrl(String newHarvestingUrl) { this.newHarvestingUrl = newHarvestingUrl; } + public String getCustomHeader() { + return customHeader; + } + + public void setCustomHeader(String customHeader) { + this.customHeader = customHeader; + } + public int getHarvestTypeRadio() { return this.harvestTypeRadio; } @@ -871,6 +903,14 @@ public void setNewClientUrlInputField(UIInput newClientInputField) { this.newClientUrlInputField = newClientInputField; } + public UIInput getNewClientCustomHeaderInputField() { + return newClientCustomHeaderInputField; + } + + public void setNewClientCustomHeaderInputField(UIInput newClientInputField) { + this.newClientCustomHeaderInputField = newClientInputField; + } + public UIInput getHiddenInputField() { return hiddenInputField; } diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 62531d32bb2..e2007338e08 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -538,6 +538,10 @@ harvestclients.newClientDialog.nickname.helptext=Consists of letters, digits, un harvestclients.newClientDialog.nickname.required=Client nickname cannot be empty! harvestclients.newClientDialog.nickname.invalid=Client nickname can contain only letters, digits, underscores (_) and dashes (-); and must be at most 30 characters. harvestclients.newClientDialog.nickname.alreadyused=This nickname is already used. +harvestclients.newClientDialog.customHeader=Custom HTTP Header +harvestclients.newClientDialog.customHeader.helptext=(Optional) Custom HTTP header to add to OAI requests +harvestclients.newClientDialog.customHeader.watermark=Enter the header as in header-name: header-value +harvestclients.newClientDialog.customHeader.invalid=Client header name can only contain letters, digits, underscores (_) and dashes (-); the entire header string must be in the form of "header-name: header-value" harvestclients.newClientDialog.type=Server Protocol harvestclients.newClientDialog.type.helptext=Only the OAI server protocol is currently supported. harvestclients.newClientDialog.type.OAI=OAI diff --git a/src/main/webapp/harvestclients.xhtml b/src/main/webapp/harvestclients.xhtml index 5c7b3482ed3..a5f271e8e75 100644 --- a/src/main/webapp/harvestclients.xhtml +++ b/src/main/webapp/harvestclients.xhtml @@ -277,6 +277,23 @@
+ + +
+ +
+ + + +

#{bundle['harvestclients.newClientDialog.customHeader.helptext']}

+
+
From 725348d78f2d749242dec78cdd071ef3428b6b69 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 17 Jan 2023 14:51:50 -0500 Subject: [PATCH 536/608] minor doc tweaks #7980 --- doc/release-notes/7980-enhanced-dsd.md | 4 +-- .../source/installation/config.rst | 32 +++++++++++-------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/doc/release-notes/7980-enhanced-dsd.md b/doc/release-notes/7980-enhanced-dsd.md index 6a86a2c4b37..d69f201e565 100644 --- a/doc/release-notes/7980-enhanced-dsd.md +++ b/doc/release-notes/7980-enhanced-dsd.md @@ -1,4 +1,4 @@ -### Default Values for Database Connections fixed +### Default Values for Database Connections Fixed Introduced in Dataverse release 5.3 a regression might have hit you: the announced default values for the database connection never actually worked. @@ -7,4 +7,4 @@ With the update to Payara 5.2022.3 it was possible to introduce working defaults. The documentation has been changed accordingly. Together with this change, you can now enable advanced connection pool -configurations useful for debugging and monitoring. See the docs for details. \ No newline at end of file +configurations useful for debugging and monitoring. Of particular interest may be `sslmode=require`. See the docs for details. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index bfd6c511a79..15924205026 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -263,20 +263,20 @@ As for the "Remote only" authentication mode, it means that: - ``:DefaultAuthProvider`` has been set to use the desired authentication provider - The "builtin" authentication provider has been disabled (:ref:`api-toggle-auth-provider`). Note that disabling the "builtin" authentication provider means that the API endpoint for converting an account from a remote auth provider will not work. Converting directly from one remote authentication provider to another (i.e. from GitHub to Google) is not supported. Conversion from remote is always to "builtin". Then the user initiates a conversion from "builtin" to remote. Note that longer term, the plan is to permit multiple login options to the same Dataverse installation account per https://github.com/IQSS/dataverse/issues/3487 (so all this talk of conversion will be moot) but for now users can only use a single login option, as explained in the :doc:`/user/account` section of the User Guide. In short, "remote only" might work for you if you only plan to use a single remote authentication provider such that no conversion between remote authentication providers will be necessary. - +.. _database-persistence: Database Persistence -------------------- -The Dataverse software uses a PostgreSQL server and a Solr Search Index to store objects users create. -You can configure basic and advanced settings of the PostgreSQL database connection with the help of +The Dataverse software uses a PostgreSQL database to store objects users create. +You can configure basic and advanced settings for the PostgreSQL database connection with the help of MicroProfile Config API. Basic Database Settings +++++++++++++++++++++++ -1. Any of these settings can be set via system properties (see :ref:`jvm-options`), environment variables or other - MicroProfile Config mechanisms supported by the appserver. +1. Any of these settings can be set via system properties (see :ref:`jvm-options` starting at :ref:`dataverse.db.name`), environment variables or other + MicroProfile Config mechanisms supported by the app server. `See Payara docs for supported sources `_. 2. Remember to protect your secrets. For passwords, use an environment variable (bare minimum), a password alias named the same as the key (OK) or use the "dir config source" of Payara (best). @@ -289,7 +289,7 @@ Basic Database Settings asadmin create-password-alias --passwordfile /tmp/p.txt dataverse.db.password rm /tmp/p.txt -3. Environment variables follow the key, replacing any dot, colon, dash, etc into an underscore "_" and all uppercase +3. Environment variables follow the key, replacing any dot, colon, dash, etc. into an underscore "_" and all uppercase letters. Example: ``dataverse.db.host`` -> ``DATAVERSE_DB_HOST`` .. list-table:: @@ -320,7 +320,7 @@ Basic Database Settings - | ``dataverse`` | (installer sets to ``dvndb``) * - dataverse.db.parameters - - Connection parameters, see `Postgres JDBC docs `_ + - Connection parameters, such as ``sslmode=require``. See `Postgres JDBC docs `_ Note: you don't need to provide the initial "?". - *Empty string* @@ -347,17 +347,17 @@ Connection Validation - Description - Default * - dataverse.db.is-connection-validation-required - - ``true``: Validate connections, allow server to reconnect in case of failure + - ``true``: Validate connections, allow server to reconnect in case of failure. - false * - dataverse.db.connection-validation-method - | The method of connection validation: - | ``table|autocommit|meta-data|custom-validation`` + | ``table|autocommit|meta-data|custom-validation``. - *Empty string* * - dataverse.db.validation-table-name - - The name of the table used for validation if the validation method is set to ``table`` + - The name of the table used for validation if the validation method is set to ``table``. - *Empty string* * - dataverse.db.validation-classname - - The name of the custom class used for validation if the ``validation-method`` is set to ``custom-validation`` + - The name of the custom class used for validation if the ``validation-method`` is set to ``custom-validation``. - *Empty string* * - dataverse.db.validate-atmost-once-period-in-seconds - Specifies the time interval in seconds between successive requests to validate a connection at most once. @@ -381,10 +381,10 @@ Connection & Statement Leaks - If enabled, leaked connection will be reclaimed by the pool after connection leak timeout occurs. - ``false`` * - dataverse.db.statement-leak-timeout-in-seconds - - Specifiy timeout when statements should be considered to be "leaked" + - Specifiy timeout when statements should be considered to be "leaked". - ``0`` (disabled) * - dataverse.db.statement-leak-reclaim - - If enabled, leaked statement will be reclaimed by the pool after statement leak timeout occurs + - If enabled, leaked statement will be reclaimed by the pool after statement leak timeout occurs. - ``false`` Logging & Slow Performance @@ -405,7 +405,7 @@ Logging & Slow Performance - SQL queries that exceed this time in seconds will be logged. - ``-1`` (disabled) * - dataverse.db.log-jdbc-calls - - When set to true, all JDBC calls will be logged allowing tracing of all JDBC interactions including SQL + - When set to true, all JDBC calls will be logged allowing tracing of all JDBC interactions including SQL. - ``false`` @@ -1691,6 +1691,8 @@ dataverse.auth.password-reset-timeout-in-minutes Users have 60 minutes to change their passwords by default. You can adjust this value here. +.. _dataverse.db.name: + dataverse.db.name +++++++++++++++++ @@ -1700,6 +1702,8 @@ Defaults to ``dataverse`` (but the installer sets it to ``dvndb``). Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_DB_NAME``. +See also :ref:`database-persistence`. + dataverse.db.user +++++++++++++++++ From d191cba41ca3665a3fd55e1f0c944a998b56a515 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Wed, 18 Jan 2023 15:43:59 -0500 Subject: [PATCH 537/608] #8724 fix dv linking to self if parent is linked --- .../edu/harvard/iq/dataverse/search/IndexServiceBean.java | 5 +++++ src/test/java/edu/harvard/iq/dataverse/api/LinkIT.java | 8 ++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 09cd8a72f0c..f9a3cbf5633 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -1662,6 +1662,11 @@ private List retrieveDVOPaths(DvObject dvo) { logger.info("failed to find dataverseSegments for dataversePaths for " + SearchFields.SUBTREE + ": " + ex); } List dataversePaths = getDataversePathsFromSegments(dataverseSegments); + if (dataversePaths.size() > 0) { + // removing the dataverse's own id from the paths + // fixes bug where if my parent dv was linked my dv was shown as linked to myself + dataversePaths.remove(dataversePaths.size() - 1); + } /* add linking paths */ diff --git a/src/test/java/edu/harvard/iq/dataverse/api/LinkIT.java b/src/test/java/edu/harvard/iq/dataverse/api/LinkIT.java index 9ac2d2cb7e5..45efda9e230 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/LinkIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/LinkIT.java @@ -174,10 +174,14 @@ public void testDeepLinks() { * Remove this early return when you are ready to work on * https://github.com/IQSS/dataverse/issues/7430 about strange linking * behavior. - */ - if (true) { + * + * + * + * if (true) { return; } + */ + Response createLevel2a = UtilIT.createSubDataverse(UtilIT.getRandomDvAlias() + "-level2a", null, apiToken, level1a); createLevel2a.prettyPrint(); From 230298902fbb7296c9623a355e66e72302f83174 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 19 Jan 2023 10:00:33 -0500 Subject: [PATCH 538/608] rename sql scripts #9153 "Use a version like '4.11.0.1' in the example above where the previously released version was 4.11" -- dev guide That is, these scripts should have been 5.12.1.whatever since the last release was 5.12.1. Fixing. (They were 5.13.whatever.) --- ...-sorting_licenses.sql => V5.12.1.1__8671-sorting_licenses.sql} | 0 ...ls-for-tools.sql => V5.12.1.2__7715-signed-urls-for-tools.sql} | 0 ...imates.sql => V5.12.1.3__8840-improve-guestbook-estimates.sql} | 0 ...-extract-metadata.sql => V5.12.1.4__9153-extract-metadata.sql} | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename src/main/resources/db/migration/{V5.13.0.1__8671-sorting_licenses.sql => V5.12.1.1__8671-sorting_licenses.sql} (100%) rename src/main/resources/db/migration/{V5.13.0.2__7715-signed-urls-for-tools.sql => V5.12.1.2__7715-signed-urls-for-tools.sql} (100%) rename src/main/resources/db/migration/{V5.13.0.3__8840-improve-guestbook-estimates.sql => V5.12.1.3__8840-improve-guestbook-estimates.sql} (100%) rename src/main/resources/db/migration/{V5.13.0.3__9153-extract-metadata.sql => V5.12.1.4__9153-extract-metadata.sql} (100%) diff --git a/src/main/resources/db/migration/V5.13.0.1__8671-sorting_licenses.sql b/src/main/resources/db/migration/V5.12.1.1__8671-sorting_licenses.sql similarity index 100% rename from src/main/resources/db/migration/V5.13.0.1__8671-sorting_licenses.sql rename to src/main/resources/db/migration/V5.12.1.1__8671-sorting_licenses.sql diff --git a/src/main/resources/db/migration/V5.13.0.2__7715-signed-urls-for-tools.sql b/src/main/resources/db/migration/V5.12.1.2__7715-signed-urls-for-tools.sql similarity index 100% rename from src/main/resources/db/migration/V5.13.0.2__7715-signed-urls-for-tools.sql rename to src/main/resources/db/migration/V5.12.1.2__7715-signed-urls-for-tools.sql diff --git a/src/main/resources/db/migration/V5.13.0.3__8840-improve-guestbook-estimates.sql b/src/main/resources/db/migration/V5.12.1.3__8840-improve-guestbook-estimates.sql similarity index 100% rename from src/main/resources/db/migration/V5.13.0.3__8840-improve-guestbook-estimates.sql rename to src/main/resources/db/migration/V5.12.1.3__8840-improve-guestbook-estimates.sql diff --git a/src/main/resources/db/migration/V5.13.0.3__9153-extract-metadata.sql b/src/main/resources/db/migration/V5.12.1.4__9153-extract-metadata.sql similarity index 100% rename from src/main/resources/db/migration/V5.13.0.3__9153-extract-metadata.sql rename to src/main/resources/db/migration/V5.12.1.4__9153-extract-metadata.sql From b4bb357062222b72ebacd48e45c721adc06ee82c Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 19 Jan 2023 10:13:42 -0500 Subject: [PATCH 539/608] #8724 add release note for re-index collections --- .../8724-display-child-datasets-of-linked-dv.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 doc/release-notes/8724-display-child-datasets-of-linked-dv.md diff --git a/doc/release-notes/8724-display-child-datasets-of-linked-dv.md b/doc/release-notes/8724-display-child-datasets-of-linked-dv.md new file mode 100644 index 00000000000..5b1b9c8ae20 --- /dev/null +++ b/doc/release-notes/8724-display-child-datasets-of-linked-dv.md @@ -0,0 +1,14 @@ +Datasets that are part of linked dataverse collections will now be displayed in +their linking dataverse collections. In order to fix the display of collections +that have already been linked you must re-index the linked collections. This +query will provide a list of commands to re-index the effected collections: + +select 'curl http://localhost:8080/api/admin/index/dataverses/' +|| tmp.dvid from (select distinct dataverse_id as dvid +from dataverselinkingdataverse) as tmp + +The result of the query will be a list of re-index commands such as: + +curl http://localhost:8080/api/admin/index/dataverses/633 + +where '633' is the id of the linked collection. From f4e1dc9a4730da1207d3993d0f9b33ddf7635c38 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 19 Jan 2023 14:10:33 -0500 Subject: [PATCH 540/608] #8724 remove comments --- .../java/edu/harvard/iq/dataverse/api/LinkIT.java | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/LinkIT.java b/src/test/java/edu/harvard/iq/dataverse/api/LinkIT.java index 45efda9e230..76e9b7d6bc8 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/LinkIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/LinkIT.java @@ -170,19 +170,6 @@ public void testDeepLinks() { .body("data.total_count", equalTo(1)) .body("data.items[0].name", equalTo(level1a)); - /** - * Remove this early return when you are ready to work on - * https://github.com/IQSS/dataverse/issues/7430 about strange linking - * behavior. - * - * - * - * if (true) { - return; - } - */ - - Response createLevel2a = UtilIT.createSubDataverse(UtilIT.getRandomDvAlias() + "-level2a", null, apiToken, level1a); createLevel2a.prettyPrint(); String level2a = UtilIT.getAliasFromResponse(createLevel2a); From d328371399fe06e31bf6a2a3007e0f3785a3fb4f Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Fri, 20 Jan 2023 15:38:01 -0500 Subject: [PATCH 541/608] #8339 add export API Token to docs --- doc/sphinx-guides/source/api/native-api.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 7a99795c335..0a8de08c56d 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -2061,7 +2061,7 @@ Files ----- Get JSON Representation of a File -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. note:: Files can be accessed using persistent identifiers. This is done by passing the constant ``:persistentId`` where the numeric id of the file is expected, and then passing the actual persistent id as a query parameter with the name ``persistentId``. @@ -2071,6 +2071,7 @@ Example: Getting the file whose DOI is *10.5072/FK2/J8SJZB*: export SERVER_URL=https://demo.dataverse.org export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx curl -H "X-Dataverse-key:$API_TOKEN" $SERVER_URL/api/files/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER @@ -2078,7 +2079,7 @@ The fully expanded example above (without environment variables) looks like this .. code-block:: bash - curl -H "X-Dataverse-key:$API_TOKEN" https://demo.dataverse.org/api/files/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" https://demo.dataverse.org/api/files/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB You may get its draft version if you pass an api token with view draft permissions: @@ -2086,6 +2087,7 @@ You may get its draft version if you pass an api token with view draft permissio export SERVER_URL=https://demo.dataverse.org export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx curl -H "X-Dataverse-key:$API_TOKEN" http://$SERVER/api/files/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER @@ -2093,7 +2095,7 @@ The fully expanded example above (without environment variables) looks like this .. code-block:: bash - curl -H "X-Dataverse-key:$API_TOKEN" https://demo.dataverse.org/api/files/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" https://demo.dataverse.org/api/files/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB |CORS| Show the file whose id is passed: From aab0f2ab4ef87fdd46f70e0c5de1870ccccd55b0 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Mon, 23 Jan 2023 10:27:03 +0100 Subject: [PATCH 542/608] simplified for loop: loop directly on DataFiles of dataset, not over each version separately --- .../java/edu/harvard/iq/dataverse/api/Datasets.java | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 2c0d066b3fc..f0a33ceeb5a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2538,13 +2538,11 @@ public Response cleanStorage(@PathParam("id") String idSupplied, @QueryParam("dr List deleted = new ArrayList<>(); Set files = new HashSet(); try { - for (DatasetVersion dv : dataset.getVersions()) { - for (FileMetadata f : dv.getFileMetadatas()) { - String storageIdentifier = f.getDataFile().getStorageIdentifier(); - String location = storageIdentifier.substring(storageIdentifier.indexOf("://") + 3); - String[] locationParts = location.split(":", 3);//separate bucket, swift container, etc. from fileName - files.add(locationParts[locationParts.length-1]); - } + for (DataFile dataFile: dataset.getFiles()) { + String storageIdentifier = dataFile.getStorageIdentifier(); + String location = storageIdentifier.substring(storageIdentifier.indexOf("://") + 3); + String[] locationParts = location.split(":", 3);//separate bucket, swift container, etc. from fileName + files.add(locationParts[locationParts.length-1]); } StorageIO datasetIO = DataAccess.getStorageIO(dataset); Predicate filter = f -> { From 360b73819df4028a9fbcd1bc914ab90720f334da Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Mon, 23 Jan 2023 12:35:49 +0100 Subject: [PATCH 543/608] clean up files made safer --- .../harvard/iq/dataverse/api/Datasets.java | 49 ++++++++++------ .../iq/dataverse/dataaccess/FileAccessIO.java | 5 +- .../dataverse/dataaccess/InputStreamIO.java | 2 +- .../dataaccess/RemoteOverlayAccessIO.java | 4 +- .../iq/dataverse/dataaccess/S3AccessIO.java | 7 ++- .../iq/dataverse/dataaccess/StorageIO.java | 2 +- .../dataverse/dataaccess/SwiftAccessIO.java | 5 +- .../iq/dataverse/api/DatasetsTest.java | 58 +++++++++++++++++++ 8 files changed, 105 insertions(+), 27 deletions(-) create mode 100644 src/test/java/edu/harvard/iq/dataverse/api/DatasetsTest.java diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index f0a33ceeb5a..43b0c6f9529 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -121,6 +121,7 @@ import java.util.Map.Entry; import java.util.logging.Level; import java.util.logging.Logger; +import java.util.regex.Pattern; import java.util.stream.Collectors; import javax.ejb.EJB; @@ -157,6 +158,7 @@ public class Datasets extends AbstractApiBean { private static final Logger logger = Logger.getLogger(Datasets.class.getCanonicalName()); + private static final Pattern dataFilePattern = Pattern.compile("^[0-9a-f]{11}-[0-9a-f]{12}\\.?.*"); @Inject DataverseSession session; @@ -2535,34 +2537,45 @@ public Response cleanStorage(@PathParam("id") String idSupplied, @QueryParam("dr return error(Response.Status.INTERNAL_SERVER_ERROR, "Access denied!"); } - List deleted = new ArrayList<>(); - Set files = new HashSet(); + boolean doDryRun = dryrun != null && dryrun.booleanValue(); + + // check if no legacy files are present + Set datasetFilenames = getDatasetFilenames(dataset); + if (datasetFilenames.stream().anyMatch(x -> !dataFilePattern.matcher(x).matches())) { + return error(Response.Status.INTERNAL_SERVER_ERROR, "Dataset contains files not matching the nameing pattern!"); + } + + Predicate filter = getToDeleteFilesFilter(datasetFilenames); + List deleted; try { - for (DataFile dataFile: dataset.getFiles()) { - String storageIdentifier = dataFile.getStorageIdentifier(); - String location = storageIdentifier.substring(storageIdentifier.indexOf("://") + 3); - String[] locationParts = location.split(":", 3);//separate bucket, swift container, etc. from fileName - files.add(locationParts[locationParts.length-1]); - } StorageIO datasetIO = DataAccess.getStorageIO(dataset); - Predicate filter = f -> { - return !f.startsWith("export_") || files.stream().noneMatch(x -> f.startsWith(x)); - }; - - if (dryrun != null && dryrun.booleanValue()) { - deleted.addAll(files.stream().filter(filter).collect(Collectors.toList())); - } else { - deleted.addAll(datasetIO.cleanUp(filter)); - } + deleted = datasetIO.cleanUp(filter, doDryRun); } catch (IOException ex) { logger.log(Level.SEVERE, null, ex); return error(Response.Status.INTERNAL_SERVER_ERROR, "IOException! Serious Error! See administrator!"); } - return ok("Found: " + files.stream().collect(Collectors.joining(", ")) + "\n" + "Deleted: " + deleted.stream().collect(Collectors.joining(", "))); + return ok("Found: " + datasetFilenames.stream().collect(Collectors.joining(", ")) + "\n" + "Deleted: " + deleted.stream().collect(Collectors.joining(", "))); } + private static Set getDatasetFilenames(Dataset dataset) { + Set files = new HashSet<>(); + for (DataFile dataFile: dataset.getFiles()) { + String storageIdentifier = dataFile.getStorageIdentifier(); + String location = storageIdentifier.substring(storageIdentifier.indexOf("://") + 3); + String[] locationParts = location.split(":", 3);//separate bucket, swift container, etc. from fileName + files.add(locationParts[locationParts.length-1]); + } + return files; + } + + public static Predicate getToDeleteFilesFilter(Set datasetFilenames) { + return f -> { + return dataFilePattern.matcher(f).matches() && datasetFilenames.stream().noneMatch(x -> f.startsWith(x)); + }; + } + private void msg(String m) { //System.out.println(m); logger.fine(m); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index cc72a9cfb02..8ee3f0cf53c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -726,8 +726,11 @@ private void deleteFile(String fileName) throws IOException { } @Override - public List cleanUp(Predicate filter) throws IOException { + public List cleanUp(Predicate filter, boolean dryRun) throws IOException { List toDelete = this.listAllFiles().stream().filter(filter).collect(Collectors.toList()); + if (dryRun) { + return toDelete; + } for (String f : toDelete) { this.deleteFile(f); } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java index 2a867bddcac..be6f9df0254 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java @@ -161,7 +161,7 @@ public void revertBackupAsAux(String auxItemTag) throws IOException { } @Override - public List cleanUp(Predicate filter) throws IOException { + public List cleanUp(Predicate filter, boolean dryRun) throws IOException { throw new UnsupportedDataAccessOperationException("InputStreamIO: tthis method is not supported in this DataAccess driver."); } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 22373fdfee0..66c6a4cc2ee 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -633,7 +633,7 @@ public static String getBaseStoreIdFor(String driverId) { } @Override - public List cleanUp(Predicate filter) throws IOException { - return baseStore.cleanUp(filter); + public List cleanUp(Predicate filter, boolean dryRun) throws IOException { + return baseStore.cleanUp(filter, dryRun); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 8dc93361375..f396b07d788 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -1309,8 +1309,6 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { return true; } - - private List listAllFiles() throws IOException { if (!this.canWrite()) { open(); @@ -1372,8 +1370,11 @@ private void deleteFile(String fileName) throws IOException { } @Override - public List cleanUp(Predicate filter) throws IOException { + public List cleanUp(Predicate filter, boolean dryRun) throws IOException { List toDelete = this.listAllFiles().stream().filter(filter).collect(Collectors.toList()); + if (dryRun) { + return toDelete; + } for (String f : toDelete) { this.deleteFile(f); } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 54e457ffab6..bfd5c5f0d8f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -623,6 +623,6 @@ protected static boolean usesStandardNamePattern(String identifier) { return m.find(); } - public abstract List cleanUp(Predicate filter) throws IOException; + public abstract List cleanUp(Predicate filter, boolean dryRun) throws IOException; } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java index 8857b054108..6c84009de3e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java @@ -944,8 +944,11 @@ private void deleteFile(String fileName) throws IOException { } @Override - public List cleanUp(Predicate filter) throws IOException { + public List cleanUp(Predicate filter, boolean dryRun) throws IOException { List toDelete = this.listAllFiles().stream().filter(filter).collect(Collectors.toList()); + if (dryRun) { + return toDelete; + } for (String f : toDelete) { this.deleteFile(f); } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsTest.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsTest.java new file mode 100644 index 00000000000..fded590d9db --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsTest.java @@ -0,0 +1,58 @@ +package edu.harvard.iq.dataverse.api; + +import org.junit.Test; +import java.util.HashSet; +import java.util.Set; +import java.util.function.Predicate; +import java.util.stream.Collectors; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class DatasetsTest { + + /** + * Test cleanup filter + */ + @Test + public void testCleanup() { + Set datasetFiles = new HashSet<>() { + { + add("1837fda0b6c-90779481d439"); + add("1837fda0e17-4b0926f6d44e"); + add("1837fda1b80-46a899909269"); + } + }; + Set filesOnDrive = new HashSet<>() { + { + add("1837fda0b6c-90779481d439"); + add("1837fda0e17-4b0926f6d44e"); + add("1837fda1b80-46a899909269"); + add("prefix_1837fda0b6c-90779481d439"); + add("1837fda0e17-4b0926f6d44e_suffix"); + add("1837fda1b80-extra-46a899909269"); + add("1837fda0e17-4b0926f6d44e.aux"); + add("1837fda1994-5f74d57e6e47"); + add("1837fda17ce-d7b9987fc6e9"); + add("18383198c49-aeda08ccffff"); + add("prefix_1837fda1994-5f74d57e6e47"); + add("1837fda17ce-d7b9987fc6e9_suffix"); + add("18383198c49-extra-aeda08ccffff"); + add("some_other_file"); + add("1837fda17ce-d7b9987fc6e9.aux"); + add("18383198c49.aeda08ccffff"); + add("1837fda17ce-d7b9987fc6xy"); + } + }; + + Predicate toDeleteFilesFilter = Datasets.getToDeleteFilesFilter(datasetFiles); + Set deleted = filesOnDrive.stream().filter(toDeleteFilesFilter).collect(Collectors.toSet()); + + assertEquals(5, deleted.size()); + assertTrue(deleted.contains("1837fda1994-5f74d57e6e47")); + assertTrue(deleted.contains("1837fda17ce-d7b9987fc6e9")); + assertTrue(deleted.contains("18383198c49-aeda08ccffff")); + assertTrue(deleted.contains("1837fda17ce-d7b9987fc6e9_suffix")); + assertTrue(deleted.contains("1837fda17ce-d7b9987fc6e9.aux")); + } +} From 7749b01995dd37895a0ca01162322268562aab84 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 23 Jan 2023 10:36:06 -0500 Subject: [PATCH 544/608] The remaining, mostly finalized changes for the "custom header" feature for OAI harvesting (#9231) --- .../source/admin/harvestclients.rst | 2 + doc/sphinx-guides/source/api/native-api.rst | 4 +- modules/dataverse-parent/pom.xml | 7 +- .../iq/dataverse/HarvestingClientsPage.java | 13 +- .../harvest/client/FastGetRecord.java | 124 +++++---- .../harvest/client/HarvesterServiceBean.java | 12 +- .../client/oai/CustomJdkHttpXoaiClient.java | 259 ------------------ .../harvest/client/oai/OaiHandler.java | 41 +-- src/main/java/propertyFiles/Bundle.properties | 4 +- src/main/webapp/harvestclients.xhtml | 2 +- 10 files changed, 112 insertions(+), 356 deletions(-) delete mode 100644 src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/CustomJdkHttpXoaiClient.java diff --git a/doc/sphinx-guides/source/admin/harvestclients.rst b/doc/sphinx-guides/source/admin/harvestclients.rst index e94a6aa1730..37204003026 100644 --- a/doc/sphinx-guides/source/admin/harvestclients.rst +++ b/doc/sphinx-guides/source/admin/harvestclients.rst @@ -21,6 +21,8 @@ Clients are managed on the "Harvesting Clients" page accessible via the :doc:`da The process of creating a new, or editing an existing client, is largely self-explanatory. It is split into logical steps, in a way that allows the user to go back and correct the entries made earlier. The process is interactive and guidance text is provided. For example, the user is required to enter the URL of the remote OAI server. When they click *Next*, the application will try to establish a connection to the server in order to verify that it is working, and to obtain the information about the sets of metadata records and the metadata formats it supports. The choices offered to the user on the next page will be based on this extra information. If the application fails to establish a connection to the remote archive at the address specified, or if an invalid response is received, the user is given an opportunity to check and correct the URL they entered. +Note that as of 5.13, a new entry "Custom HTTP Header" has been added to the Step 1. of Create or Edit form. This optional field can be used to configure this client with a specific HTTP header to be added to every OAI request. This is to accommodate a (rare) use case where the remote server may require a special token of some kind in order to offer some content not available to other clients. Most OAI servers offer the same publicly-available content to all clients, so few admins will have a use for this feature. It is however on the very first, Step 1. screen in case the OAI server requires this token even for the "ListSets" and "ListMetadataFormats" requests, which need to be sent in the Step 2. of creating or editing a client. Multiple headers can be supplied separated by `\n` - actual "backslash" and "n" characters, not a single "new line" character. + How to Stop a Harvesting Run in Progress ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 589b947f15e..609f1487177 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -3296,7 +3296,8 @@ The following optional fields are supported: - archiveDescription: What the name suggests. If not supplied, will default to "This Dataset is harvested from our partners. Clicking the link will take you directly to the archival source of the data." - set: The OAI set on the remote server. If not supplied, will default to none, i.e., "harvest everything". - style: Defaults to "default" - a generic OAI archive. (Make sure to use "dataverse" when configuring harvesting from another Dataverse installation). - +- customHeaders: This can be used to configure this client with a specific HTTP header that will be added to every OAI request. This is to accommodate a use case where the remote server requires this header to supply some form of a token in order to offer some content not available to other clients. See the example below. Multiple headers can be supplied separated by `\n` - actual "backslash" and "n" characters, not a single "new line" character. + Generally, the API will accept the output of the GET version of the API for an existing client as valid input, but some fields will be ignored. For example, as of writing this there is no way to configure a harvesting schedule via this API. An example JSON file would look like this:: @@ -3308,6 +3309,7 @@ An example JSON file would look like this:: "archiveUrl": "https://zenodo.org", "archiveDescription": "Moissonné depuis la collection LMOPS de l'entrepôt Zenodo. En cliquant sur ce jeu de données, vous serez redirigé vers Zenodo.", "metadataFormat": "oai_dc", + "customHeaders": "x-oai-api-key: xxxyyyzzz", "set": "user-lmops" } diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 3911e9d5bbb..600741dc972 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -164,7 +164,8 @@ 4.4.14 - 5.0.0-RC2 + + 5.0.0-SNAPSHOT 1.15.0 @@ -324,7 +325,7 @@ Local repository for hosting jars not available from network repositories. file://${project.basedir}/local_lib - oss-sonatype oss-sonatype @@ -335,7 +336,7 @@ true - --> + diff --git a/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java b/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java index 4430a7be73a..5be7578f7f8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java @@ -9,7 +9,6 @@ import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.impl.CreateHarvestingClientCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeleteHarvestingClientCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateHarvestingClientCommand; import edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean; import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; @@ -24,7 +23,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import java.util.Locale; import java.util.Collections; import java.util.logging.Level; import java.util.logging.Logger; @@ -557,6 +555,9 @@ public boolean validateServerUrlOAI() { if (!StringUtils.isEmpty(getNewHarvestingUrl())) { OaiHandler oaiHandler = new OaiHandler(getNewHarvestingUrl()); + if (getNewCustomHeader() != null) { + oaiHandler.setCustomHeaders(oaiHandler.makeCustomHeaders(getNewCustomHeader())); + } boolean success = true; String message = null; @@ -639,11 +640,11 @@ public boolean validateServerUrlOAI() { } public boolean validateCustomHeader() { - if (!StringUtils.isEmpty(getCustomHeader())) { + if (!StringUtils.isEmpty(getNewCustomHeader())) { // TODO: put this method somewhere else as a static utility // check that it's looking like "{header-name}: {header value}" at least - if (!Pattern.matches("^[a-zA-Z0-9\\_\\-]+:.*",getCustomHeader())) { + if (!Pattern.matches("^[a-zA-Z0-9\\_\\-]+:.*",getNewCustomHeader())) { FacesContext.getCurrentInstance().addMessage(getNewClientCustomHeaderInputField().getClientId(), new FacesMessage(FacesMessage.SEVERITY_ERROR, "", BundleUtil.getStringFromBundle("harvestclients.newClientDialog.customHeader.invalid"))); @@ -786,11 +787,11 @@ public void setNewHarvestingUrl(String newHarvestingUrl) { this.newHarvestingUrl = newHarvestingUrl; } - public String getCustomHeader() { + public String getNewCustomHeader() { return customHeader; } - public void setCustomHeader(String customHeader) { + public void setNewCustomHeader(String customHeader) { this.customHeader = customHeader; } diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java index c5e3a93e2df..402d0d8ef91 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java @@ -19,8 +19,8 @@ */ package edu.harvard.iq.dataverse.harvest.client; +import edu.harvard.iq.dataverse.harvest.client.oai.OaiHandler; import java.io.IOException; -import java.io.FileNotFoundException; import java.io.InputStream; import java.io.StringReader; @@ -31,9 +31,14 @@ import java.io.FileOutputStream; import java.io.PrintWriter; -import java.net.HttpURLConnection; +import static java.net.HttpURLConnection.HTTP_OK; import java.net.MalformedURLException; -import java.net.URL; +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.util.Map; +import java.util.Optional; import java.util.zip.GZIPInputStream; import java.util.zip.InflaterInputStream; @@ -84,17 +89,18 @@ public class FastGetRecord { /** * Client-side GetRecord verb constructor * - * @param baseURL the baseURL of the server to be queried + * @param oaiHandler the configured OaiHande running this harvest + * @param identifier Record identifier + * @param httpClient jdk HttpClient used to make http requests * @exception MalformedURLException the baseURL is bad * @exception SAXException the xml response is bad * @exception IOException an I/O error occurred + * @exception TransformerException if it fails to parse the service portion of the record */ - public FastGetRecord(String baseURL, String identifier, String metadataPrefix) - throws IOException, ParserConfigurationException, SAXException, + public FastGetRecord(OaiHandler oaiHandler, String identifier, HttpClient httpClient) throws IOException, ParserConfigurationException, SAXException, TransformerException { - harvestRecord (baseURL, identifier, metadataPrefix); - + harvestRecord (oaiHandler.getBaseOaiUrl(), identifier, oaiHandler.getMetadataPrefix(), oaiHandler.getCustomHeaders(), httpClient); } private String errorMessage = null; @@ -117,57 +123,63 @@ public boolean isDeleted () { } - public void harvestRecord(String baseURL, String identifier, String metadataPrefix) throws IOException, - ParserConfigurationException, SAXException, TransformerException { + public void harvestRecord(String baseURL, String identifier, String metadataPrefix, Map customHeaders, HttpClient httpClient) throws IOException, + ParserConfigurationException, SAXException, TransformerException{ xmlInputFactory = javax.xml.stream.XMLInputFactory.newInstance(); - String requestURL = getRequestURL(baseURL, identifier, metadataPrefix); + InputStream in; + + // This was one other place where the Harvester code was still using + // the obsolete java.net.ttpUrlConnection that didn't get replaced with + // the new java.net.http.HttpClient during the first pas of the XOAI + // rewrite. (L.A.) - InputStream in = null; - URL url = new URL(requestURL); - HttpURLConnection con = null; - int responseCode = 0; - - con = (HttpURLConnection) url.openConnection(); - con.setRequestProperty("User-Agent", "Dataverse Harvesting Client v5"); - con.setRequestProperty("Accept-Encoding", - "compress, gzip, identify"); - try { - responseCode = con.getResponseCode(); - //logger.debug("responseCode=" + responseCode); - } catch (FileNotFoundException e) { - //logger.info(requestURL, e); - responseCode = HttpURLConnection.HTTP_UNAVAILABLE; - } - - // TODO: -- L.A. - // - // support for cookies; - // support for limited retry attempts -- ? - // implement reading of the stream as filterinputstream -- ? - // -- that could make it a little faster still. -- L.A. - - - - if (responseCode == 200) { - - String contentEncoding = con.getHeaderField("Content-Encoding"); - //logger.debug("contentEncoding=" + contentEncoding); - - // support for the standard compress/gzip/deflate compression - // schemes: - if ("compress".equals(contentEncoding)) { - ZipInputStream zis = new ZipInputStream(con.getInputStream()); - zis.getNextEntry(); - in = zis; - } else if ("gzip".equals(contentEncoding)) { - in = new GZIPInputStream(con.getInputStream()); - } else if ("deflate".equals(contentEncoding)) { - in = new InflaterInputStream(con.getInputStream()); - } else { - in = con.getInputStream(); + if (httpClient == null) { + throw new IOException("Null Http Client, cannot make a GetRecord call to obtain the metadata."); + } + + HttpRequest.Builder requestBuilder = HttpRequest.newBuilder() + .uri(URI.create(requestURL)) + .GET() + .header("User-Agent", "XOAI Service Provider v5 (Dataverse)") + .header("Accept-Encoding", "compress, gzip"); + + if (customHeaders != null) { + for (String headerName : customHeaders.keySet()) { + requestBuilder.header(headerName, customHeaders.get(headerName)); + } + } + + HttpRequest request = requestBuilder.build(); + HttpResponse response; + + try { + response = httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream()); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + throw new IOException("Failed to connect to the remote dataverse server to obtain GetRecord metadata"); + } + + int responseCode = response.statusCode(); + + if (responseCode == HTTP_OK) { + InputStream inputStream = response.body(); + Optional contentEncoding = response.headers().firstValue("Content-Encoding"); + + // support for the standard gzip encoding: + in = inputStream; + if (contentEncoding.isPresent()) { + if (contentEncoding.get().equals("compress")) { + ZipInputStream zis = new ZipInputStream(inputStream); + zis.getNextEntry(); + in = zis; + } else if (contentEncoding.get().equals("gzip")) { + in = new GZIPInputStream(inputStream); + } else if (contentEncoding.get().equals("deflate")) { + in = new InflaterInputStream(inputStream); + } } // We are going to read the OAI header and SAX-parse it for the @@ -185,9 +197,7 @@ public void harvestRecord(String baseURL, String identifier, String metadataPref FileOutputStream tempFileStream = null; PrintWriter metadataOut = null; - savedMetadataFile = File.createTempFile("meta", ".tmp"); - - + savedMetadataFile = File.createTempFile("meta", ".tmp"); int mopen = 0; int mclose = 0; diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java index 0e9ffb20653..40bd45ecb30 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java @@ -228,11 +228,9 @@ private void harvestOAI(DataverseRequest dataverseRequest, HarvestingClient harv throw new IOException(errorMessage); } - if (DATAVERSE_PROPRIETARY_METADATA_FORMAT.equals(oaiHandler.getMetadataPrefix())) { - // If we are harvesting native Dataverse json, we'll also need this - // jdk http client to make direct calls to the remote Dataverse API: - httpClient = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.ALWAYS).build(); - } + // We will use this jdk http client to make direct calls to the remote + // OAI (or remote Dataverse API) to obtain the metadata records + httpClient = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.ALWAYS).build(); try { for (Iterator
idIter = oaiHandler.runListIdentifiers(); idIter.hasNext();) { @@ -295,7 +293,7 @@ private Long processRecord(DataverseRequest dataverseRequest, Logger hdLogger, P tempFile = retrieveProprietaryDataverseMetadata(httpClient, metadataApiUrl); } else { - FastGetRecord record = oaiHandler.runGetRecord(identifier); + FastGetRecord record = oaiHandler.runGetRecord(identifier, httpClient); errMessage = record.getErrorMessage(); deleted = record.isDeleted(); tempFile = record.getMetadataFile(); @@ -360,7 +358,7 @@ File retrieveProprietaryDataverseMetadata (HttpClient client, String remoteApiUr HttpRequest request = HttpRequest.newBuilder() .uri(URI.create(remoteApiUrl)) .GET() - .header("User-Agent", "Dataverse Harvesting Client v5") + .header("User-Agent", "XOAI Service Provider v5 (Dataverse)") .build(); HttpResponse response; diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/CustomJdkHttpXoaiClient.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/CustomJdkHttpXoaiClient.java deleted file mode 100644 index 25c3a048219..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/CustomJdkHttpXoaiClient.java +++ /dev/null @@ -1,259 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package edu.harvard.iq.dataverse.harvest.client.oai; - -import io.gdcc.xoai.serviceprovider.client.OAIClient; - -import io.gdcc.xoai.serviceprovider.exceptions.OAIRequestException; -import io.gdcc.xoai.serviceprovider.parameters.Parameters; -import java.io.IOException; -import java.io.InputStream; -import static java.net.HttpURLConnection.HTTP_OK; -import java.net.MalformedURLException; -import java.net.URI; -import java.net.URISyntaxException; -import java.net.URL; -import java.net.http.HttpClient; -import java.net.http.HttpRequest; -import java.net.http.HttpResponse; -import java.nio.charset.StandardCharsets; -import java.security.KeyManagementException; -import java.security.NoSuchAlgorithmException; -import java.security.cert.X509Certificate; -import java.time.Duration; -import java.util.List; -import java.util.ListIterator; -import javax.net.ssl.SSLContext; -import javax.net.ssl.TrustManager; -import javax.net.ssl.X509TrustManager; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.http.Header; - -/** - * Sane default OAI Client implementation using JDK HTTP Client. Can only be used via builder in - * calling code. - * (this is essentially a copy of the final class JdkHttpOaiClient provided by - * gdcc.xoai, with the custom http headers added. proof of concept! - */ -public final class CustomJdkHttpXoaiClient extends OAIClient { - - private static final Logger log = LoggerFactory.getLogger(OAIClient.class.getCanonicalName()); - - // As these vars will be feed via the builder and those provide defaults and null-checks, - // we may assume FOR INTERNAL USE these are not null. - private final String baseUrl; - private final String userAgent; - private final Duration requestTimeout; - private final HttpClient httpClient; - // Custom headers are optional though, ok to be null: - private final List
customHeaders; - - - CustomJdkHttpXoaiClient( - String baseUrl, String userAgent, Duration requestTimeout, List
customHeaders, HttpClient httpClient) { - this.baseUrl = baseUrl; - this.userAgent = userAgent; - this.requestTimeout = requestTimeout; - this.httpClient = httpClient; - this.customHeaders = customHeaders; - } - - @Override - public InputStream execute(Parameters parameters) throws OAIRequestException { - try { - URI requestURI = URI.create(parameters.toUrl(this.baseUrl)); - - HttpRequest.Builder httpRequestBuilder = HttpRequest.newBuilder() - .uri(requestURI) - .GET() - .header("User-Agent", this.userAgent) - .timeout(requestTimeout); - - // add custom headers, if present: - if (customHeaders != null) { - ListIterator
iterator = customHeaders.listIterator(); - while (iterator.hasNext()) { - Header customHeader = iterator.next(); - httpRequestBuilder.header(customHeader.getName(), customHeader.getValue()); - } - } - - HttpRequest request = httpRequestBuilder.build(); - - HttpResponse response = - this.httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream()); - - if (response.statusCode() == HTTP_OK) { - return response.body(); - } else { - // copy body of the response to string and send as exception message - throw new OAIRequestException( - "Query faild with status code " - + response.statusCode() - + ": " - + new String( - response.body().readAllBytes(), StandardCharsets.UTF_8)); - } - } catch (IllegalArgumentException | IOException | InterruptedException ex) { - // Hint by SonarCloud: - // https://sonarcloud.io/organizations/gdcc/rules?open=java%3AS2142&rule_key=java%3AS2142 - Thread.currentThread().interrupt(); - throw new OAIRequestException(ex); - } - } - - /*@Override - JdkHttpBuilder newBuilder() { - return new CustomJdkHttpXoaiClient.JdkHttpBuilder(); - }*/ - - /** - * Build an {@link OAIClient} using the JDK native HTTP client. You may use your own prepared - * {@link HttpClient.Builder} instead of the default one. - * - *

Provides defaults for request timeouts (60s) and user agent. Remember to set the base - * OAI-PMH URL via {@link #withBaseUrl(URL)}. An exception will occur on first request - * otherwise. - */ - public static final class JdkHttpBuilder implements OAIClient.Builder { - private String baseUrl = "Must be set via Builder.withBaseUrl()"; - private String userAgent = "XOAI Service Provider v5"; - private Duration requestTimeout = Duration.ofSeconds(60); - private List

customHeaders = null; - private final HttpClient.Builder httpClientBuilder; - - JdkHttpBuilder() { - this.httpClientBuilder = HttpClient.newBuilder(); - } - - /** - * While the default constructor can be accessed via {@link OAIClient#newBuilder()}, if - * someone provides a {@link HttpClient.Builder} (which might already contain - * configuration), happily work with it. - * - * @param httpClientBuilder Any (preconfigured) Java 11+ HTTP client builder - */ - public JdkHttpBuilder(HttpClient.Builder httpClientBuilder) { - this.httpClientBuilder = httpClientBuilder; - } - - @Override - public JdkHttpBuilder withBaseUrl(URL baseUrl) { - return this.withBaseUrl(baseUrl.toString()); - } - - @Override - public JdkHttpBuilder withBaseUrl(String baseUrl) { - try { - new URL(baseUrl).toURI(); - if (!baseUrl.startsWith("http")) { - throw new IllegalArgumentException("OAI-PMH supports HTTP/S only"); - } - this.baseUrl = baseUrl; - return this; - } catch (MalformedURLException | URISyntaxException e) { - throw new IllegalArgumentException(e); - } - } - - @Override - public JdkHttpBuilder withConnectTimeout(Duration timeout) { - // validation is done by client builder! - httpClientBuilder.connectTimeout(timeout); - return this; - } - - @Override - public JdkHttpBuilder withRequestTimeout(Duration timeout) { - if (timeout == null || timeout.isNegative()) { - throw new IllegalArgumentException("Timeout must not be null or negative value"); - } - this.requestTimeout = timeout; - return this; - } - - @Override - public JdkHttpBuilder withUserAgent(String userAgent) { - if (userAgent == null || userAgent.isBlank()) { - throw new IllegalArgumentException("User agent must not be null or empty/blank"); - } - this.userAgent = userAgent; - return this; - } - - @Override - public JdkHttpBuilder withFollowRedirects() { - this.httpClientBuilder.followRedirects(HttpClient.Redirect.NORMAL); - return this; - } - - @Override - public JdkHttpBuilder withInsecureSSL() { - // create insecure context (switch of certificate checks) - httpClientBuilder.sslContext(insecureContext()); - - // warn if the hostname verification is still active - // (users must do this themselves - it's a global setting and might pose a security - // risk) - if (!Boolean.getBoolean("jdk.internal.httpclient.disableHostnameVerification")) { - log.warn( - "You must disable JDK HTTP Client Host Name Verification globally via" - + " system property" - + " -Djdk.internal.httpclient.disableHostnameVerification=true for" - + " XOAI Client connections to insecure SSL servers. Don't do this in" - + " a production setup!"); - } - return this; - } - - public JdkHttpBuilder withCustomHeaders(List
customHeaders) { - // This can be null, as these headers are optional - this.customHeaders = customHeaders; - return this; - } - - @Override - public CustomJdkHttpXoaiClient build() { - return new CustomJdkHttpXoaiClient( - this.baseUrl, this.userAgent, this.requestTimeout, this.customHeaders, httpClientBuilder.build()); - } - - private static SSLContext insecureContext() { - TrustManager[] noopTrustManager = - new TrustManager[] { - new X509TrustManager() { - // This is insecure by design, we warn users and they need to do sth. to - // use it. - // Safely ignore the Sonarcloud message. - @SuppressWarnings("java:S4830") - public void checkClientTrusted(X509Certificate[] xcs, String string) { - // we want to accept every certificate - intentionally left blank - } - // This is insecure by design, we warn users and they need to do sth. to - // use it. - // Safely ignore the Sonarcloud message. - @SuppressWarnings("java:S4830") - public void checkServerTrusted(X509Certificate[] xcs, String string) { - // we want to accept every certificate - intentionally left blank - } - - public X509Certificate[] getAcceptedIssuers() { - return new X509Certificate[0]; - } - } - }; - try { - SSLContext sc = SSLContext.getInstance("TLSv1.2"); - sc.init(null, noopTrustManager, null); - return sc; - } catch (KeyManagementException | NoSuchAlgorithmException ex) { - log.error("Could not build insecure SSL context. Might cause NPE.", ex); - return null; - } - } - } -} diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java index ae297416ff9..d9fa9b27c5a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java @@ -14,8 +14,10 @@ import edu.harvard.iq.dataverse.harvest.client.FastGetRecord; import static edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean.DATAVERSE_PROPRIETARY_METADATA_API; import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; +import io.gdcc.xoai.serviceprovider.client.JdkHttpOaiClient; import java.io.IOException; import java.io.Serializable; +import java.net.http.HttpClient; import javax.xml.parsers.ParserConfigurationException; import org.apache.commons.lang3.StringUtils; @@ -23,10 +25,11 @@ import javax.xml.transform.TransformerException; import java.util.ArrayList; import java.util.Date; +import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.logging.Logger; -import org.apache.http.message.BasicHeader; /** * @@ -68,7 +71,6 @@ public OaiHandler(HarvestingClient harvestingClient) throws OaiHandlerException this.fromDate = harvestingClient.getLastNonEmptyHarvestTime(); this.customHeaders = makeCustomHeaders(harvestingClient.getCustomHttpHeaders()); - //test: this.customHeaders = makeCustomHeaders("x-api-key: xxx-yyy-zzz\\ny-api-key: zzz-yyy-xxx"); this.harvestingClient = harvestingClient; } @@ -79,7 +81,7 @@ public OaiHandler(HarvestingClient harvestingClient) throws OaiHandlerException private String setName; private Date fromDate; private Boolean setListTruncated = false; - private List customHeaders = null; + private Map customHeaders = null; private ServiceProvider serviceProvider; @@ -125,11 +127,11 @@ public boolean isSetListTruncated() { return setListTruncated; } - public List getCustomHeaders() { + public Map getCustomHeaders() { return this.customHeaders; } - public void setCustomHeaders(List customHeaders) { + public void setCustomHeaders(Map customHeaders) { this.customHeaders = customHeaders; } @@ -142,17 +144,12 @@ public ServiceProvider getServiceProvider() throws OaiHandlerException { context.withBaseUrl(baseOaiUrl); context.withGranularity(Granularity.Second); - // builds the client based on the default client provided in xoai, - // with the same default parameters and the JDK http client, with - // just the (optional) custom headers added: - // (this is proof-of-concept implementation; there gotta be a prettier way to do this) - //context.withOAIClient(JdkHttpOaiClient.newBuilder().withBaseUrl(baseOaiUrl).build()); if (getCustomHeaders() != null) { - for (org.apache.http.Header customHeader : getCustomHeaders()) { - logger.info("will add custom header; name: "+customHeader.getName()+", value: "+customHeader.getValue()); + for (String headerName : getCustomHeaders().keySet()) { + logger.info("will add custom header; name: "+headerName+", value: "+getCustomHeaders().get(headerName)); } } - context.withOAIClient((new CustomJdkHttpXoaiClient.JdkHttpBuilder()).withBaseUrl(getBaseOaiUrl()).withCustomHeaders(getCustomHeaders()).build()); + context.withOAIClient(JdkHttpOaiClient.newBuilder().withBaseUrl(getBaseOaiUrl()).withCustomHeaders(getCustomHeaders()).build()); serviceProvider = new ServiceProvider(context); } @@ -258,7 +255,7 @@ public Iterator
runListIdentifiers() throws OaiHandlerException { } - public FastGetRecord runGetRecord(String identifier) throws OaiHandlerException { + public FastGetRecord runGetRecord(String identifier, HttpClient httpClient) throws OaiHandlerException { if (StringUtils.isEmpty(this.baseOaiUrl)) { throw new OaiHandlerException("Attempted to execute GetRecord without server URL specified."); } @@ -267,7 +264,7 @@ public FastGetRecord runGetRecord(String identifier) throws OaiHandlerException } try { - return new FastGetRecord(this.baseOaiUrl, identifier, this.metadataPrefix); + return new FastGetRecord(this, identifier, httpClient); } catch (ParserConfigurationException pce) { throw new OaiHandlerException("ParserConfigurationException executing GetRecord: "+pce.getMessage()); } catch (SAXException se) { @@ -317,20 +314,24 @@ public void runIdentify() { // and to learn about its extended capabilities) } - private List makeCustomHeaders(String headersString) { + public Map makeCustomHeaders(String headersString) { if (headersString != null) { - List ret = new ArrayList<>(); String[] parts = headersString.split("\\\\n"); - + HashMap ret = new HashMap<>(); + logger.info("found "+parts.length+" parts"); + int count = 0; for (int i = 0; i < parts.length; i++) { if (parts[i].indexOf(':') > 0) { String headerName = parts[i].substring(0, parts[i].indexOf(':')); String headerValue = parts[i].substring(parts[i].indexOf(':')+1).strip(); - ret.add(new BasicHeader(headerName, headerValue)); + + ret.put(headerName, headerValue); + count++; } // simply skipping it if malformed; or we could throw an exception - ? } - if (!ret.isEmpty()) { + if (ret.size() > 0) { + logger.info("returning the array with "+ret.size()+" name/value pairs"); return ret; } } diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index e2007338e08..51d9b73085b 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -539,8 +539,8 @@ harvestclients.newClientDialog.nickname.required=Client nickname cannot be empty harvestclients.newClientDialog.nickname.invalid=Client nickname can contain only letters, digits, underscores (_) and dashes (-); and must be at most 30 characters. harvestclients.newClientDialog.nickname.alreadyused=This nickname is already used. harvestclients.newClientDialog.customHeader=Custom HTTP Header -harvestclients.newClientDialog.customHeader.helptext=(Optional) Custom HTTP header to add to OAI requests -harvestclients.newClientDialog.customHeader.watermark=Enter the header as in header-name: header-value +harvestclients.newClientDialog.customHeader.helptext=(Optional) Custom HTTP header to add to requests, if required by this OAI server. +harvestclients.newClientDialog.customHeader.watermark=Enter an http header, as in header-name: header-value harvestclients.newClientDialog.customHeader.invalid=Client header name can only contain letters, digits, underscores (_) and dashes (-); the entire header string must be in the form of "header-name: header-value" harvestclients.newClientDialog.type=Server Protocol harvestclients.newClientDialog.type.helptext=Only the OAI server protocol is currently supported. diff --git a/src/main/webapp/harvestclients.xhtml b/src/main/webapp/harvestclients.xhtml index a5f271e8e75..3c09ed4ecb0 100644 --- a/src/main/webapp/harvestclients.xhtml +++ b/src/main/webapp/harvestclients.xhtml @@ -287,7 +287,7 @@
From 7888fcde8b78154a77e2d49375b815777b3a6d5d Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 23 Jan 2023 10:41:10 -0500 Subject: [PATCH 545/608] backslashes in the sphinx sources (#9231) --- doc/sphinx-guides/source/admin/harvestclients.rst | 2 +- doc/sphinx-guides/source/api/native-api.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/admin/harvestclients.rst b/doc/sphinx-guides/source/admin/harvestclients.rst index 37204003026..02783e4b97a 100644 --- a/doc/sphinx-guides/source/admin/harvestclients.rst +++ b/doc/sphinx-guides/source/admin/harvestclients.rst @@ -21,7 +21,7 @@ Clients are managed on the "Harvesting Clients" page accessible via the :doc:`da The process of creating a new, or editing an existing client, is largely self-explanatory. It is split into logical steps, in a way that allows the user to go back and correct the entries made earlier. The process is interactive and guidance text is provided. For example, the user is required to enter the URL of the remote OAI server. When they click *Next*, the application will try to establish a connection to the server in order to verify that it is working, and to obtain the information about the sets of metadata records and the metadata formats it supports. The choices offered to the user on the next page will be based on this extra information. If the application fails to establish a connection to the remote archive at the address specified, or if an invalid response is received, the user is given an opportunity to check and correct the URL they entered. -Note that as of 5.13, a new entry "Custom HTTP Header" has been added to the Step 1. of Create or Edit form. This optional field can be used to configure this client with a specific HTTP header to be added to every OAI request. This is to accommodate a (rare) use case where the remote server may require a special token of some kind in order to offer some content not available to other clients. Most OAI servers offer the same publicly-available content to all clients, so few admins will have a use for this feature. It is however on the very first, Step 1. screen in case the OAI server requires this token even for the "ListSets" and "ListMetadataFormats" requests, which need to be sent in the Step 2. of creating or editing a client. Multiple headers can be supplied separated by `\n` - actual "backslash" and "n" characters, not a single "new line" character. +Note that as of 5.13, a new entry "Custom HTTP Header" has been added to the Step 1. of Create or Edit form. This optional field can be used to configure this client with a specific HTTP header to be added to every OAI request. This is to accommodate a (rare) use case where the remote server may require a special token of some kind in order to offer some content not available to other clients. Most OAI servers offer the same publicly-available content to all clients, so few admins will have a use for this feature. It is however on the very first, Step 1. screen in case the OAI server requires this token even for the "ListSets" and "ListMetadataFormats" requests, which need to be sent in the Step 2. of creating or editing a client. Multiple headers can be supplied separated by `\\n` - actual "backslash" and "n" characters, not a single "new line" character. How to Stop a Harvesting Run in Progress ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 609f1487177..2782f4d1d08 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -3296,7 +3296,7 @@ The following optional fields are supported: - archiveDescription: What the name suggests. If not supplied, will default to "This Dataset is harvested from our partners. Clicking the link will take you directly to the archival source of the data." - set: The OAI set on the remote server. If not supplied, will default to none, i.e., "harvest everything". - style: Defaults to "default" - a generic OAI archive. (Make sure to use "dataverse" when configuring harvesting from another Dataverse installation). -- customHeaders: This can be used to configure this client with a specific HTTP header that will be added to every OAI request. This is to accommodate a use case where the remote server requires this header to supply some form of a token in order to offer some content not available to other clients. See the example below. Multiple headers can be supplied separated by `\n` - actual "backslash" and "n" characters, not a single "new line" character. +- customHeaders: This can be used to configure this client with a specific HTTP header that will be added to every OAI request. This is to accommodate a use case where the remote server requires this header to supply some form of a token in order to offer some content not available to other clients. See the example below. Multiple headers can be supplied separated by `\\n` - actual "backslash" and "n" characters, not a single "new line" character. Generally, the API will accept the output of the GET version of the API for an existing client as valid input, but some fields will be ignored. For example, as of writing this there is no way to configure a harvesting schedule via this API. From 3f43b199eb6fd9a0658317de16d3a9b1159b8266 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 23 Jan 2023 15:06:34 -0500 Subject: [PATCH 546/608] a 3 line fix for the broken "earliest date" (#9309) --- .../harvest/server/OAIRecordServiceBean.java | 13 +++++++++++++ .../harvest/server/web/servlet/OAIServlet.java | 3 ++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java index 6cdc4e5c277..3cbfe313504 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java @@ -32,6 +32,7 @@ import javax.inject.Named; import javax.persistence.EntityManager; import javax.persistence.PersistenceContext; +import javax.persistence.Query; import javax.persistence.TypedQuery; import javax.persistence.TemporalType; @@ -375,4 +376,16 @@ public List findDeletedOaiRecordsBySetName(String setName) { } } + public Instant getEarliestDate() { + String queryString = "SELECT min(r.lastUpdateTime) FROM OAIRecord r"; + TypedQuery query = em.createQuery(queryString, Date.class); + Date retDate = query.getSingleResult(); + if (retDate != null) { + return retDate.toInstant(); + } + + // if there are no records yet, return the default "now" + return new Date().toInstant(); + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java index 5d0580708a9..f966b30311b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java @@ -207,7 +207,8 @@ private RepositoryConfiguration createRepositoryConfiguration() { .withDeleteMethod(DeletedRecord.TRANSIENT) .withMaxListIdentifiers(maxListIdentifiers) .withMaxListRecords(maxListRecords) - .withMaxListSets(maxListSets); + .withMaxListSets(maxListSets) + .withEarliestDate(recordService.getEarliestDate()); return repositoryConfiguration; } From 6873ad90f9760c3de35178a761568127b16004b8 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Mon, 23 Jan 2023 16:08:03 -0500 Subject: [PATCH 547/608] #8339 allow drafts; fix bundle --- .../edu/harvard/iq/dataverse/api/Files.java | 44 +++++++++++++++---- src/main/java/propertyFiles/Bundle.properties | 1 + 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index ccd8f67fa0c..9a43932dba6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -450,10 +450,22 @@ public Response updateFileMetadata(@FormDataParam("jsonData") String jsonData, .build(); } + @GET + @Path("{id}/draft") + public Response getFileDataDraft(@PathParam("id") String fileIdOrPersistentId, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WrappedResponse, Exception { + return getFileDataResponse(fileIdOrPersistentId, uriInfo, headers, response, true); + } + @GET @Path("{id}") public Response getFileData(@PathParam("id") String fileIdOrPersistentId, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WrappedResponse, Exception { + return getFileDataResponse(fileIdOrPersistentId, uriInfo, headers, response, false); + } + + private Response getFileDataResponse(String fileIdOrPersistentId, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response, boolean draft ){ + DataverseRequest req; + try { req = createDataverseRequest(findUserOrDie()); } catch (Exception e) { @@ -465,22 +477,37 @@ public Response getFileData(@PathParam("id") String fileIdOrPersistentId, @Conte } catch (Exception e) { return error(BAD_REQUEST, "Error attempting get the requested data file."); } + FileMetadata fm; - //first get latest published - //if not available get draft if permissible - try { - - fm = df.getLatestPublishedFileMetadata(); - } catch (UnsupportedOperationException e) { + if (draft) { try { fm = execCommand(new GetDraftFileMetadataIfAvailableCommand(req, df)); } catch (WrappedResponse w) { return error(BAD_REQUEST, "An error occurred getting a draft version, you may not have permission to access unpublished data on this dataset."); } if (null == fm) { - return error(BAD_REQUEST, "No draft availabile for this dataset"); + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("files.api.no.draft")); } + } else { + //first get latest published + //if not available get draft if permissible + + try { + + fm = df.getLatestPublishedFileMetadata(); + + } catch (UnsupportedOperationException e) { + try { + fm = execCommand(new GetDraftFileMetadataIfAvailableCommand(req, df)); + } catch (WrappedResponse w) { + return error(BAD_REQUEST, "An error occurred getting a draft version, you may not have permission to access unpublished data on this dataset."); + } + if (null == fm) { + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("files.api.no.draft")); + } + } + } if (fm.getDatasetVersion().isReleased()) { @@ -523,7 +550,7 @@ public Response getFileMetadata(@PathParam("id") String fileIdOrPersistentId, @P return error(BAD_REQUEST, "An error occurred getting a draft version, you may not have permission to access unpublished data on this dataset." ); } if(null == fm) { - return error(BAD_REQUEST, "No draft availabile for this dataset"); + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("files.api.no.draft")); } } else { fm = df.getLatestPublishedFileMetadata(); @@ -539,6 +566,7 @@ public Response getFileMetadata(@PathParam("id") String fileIdOrPersistentId, @P .type(MediaType.TEXT_PLAIN) //Our plain text string is already json .build(); } + @GET @Path("{id}/metadata/draft") public Response getFileMetadataDraft(@PathParam("id") String fileIdOrPersistentId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response, Boolean getDraft) throws WrappedResponse, Exception { diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 4166ab78a39..f55a0636126 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2552,6 +2552,7 @@ admin.api.deleteUser.success=Authenticated User {0} deleted. #Files.java files.api.metadata.update.duplicateFile=Filename already exists at {0} +files.api.no.draft=No draft available for this file #Datasets.java datasets.api.updatePIDMetadata.failure.dataset.must.be.released=Modify Registration Metadata must be run on a published dataset. From cb4765d042b64023bda4acf8bc47a149655682da Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 23 Jan 2023 16:27:07 -0500 Subject: [PATCH 548/608] Checked in something earlier that is prone to null pointers, due to a change in behavior in the latest gdcc.xoai - that I knew, but had forgotten about over the weekend. (#9231) --- .../iq/dataverse/harvest/client/oai/OaiHandler.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java index d9fa9b27c5a..bb3dc06972c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java @@ -144,12 +144,15 @@ public ServiceProvider getServiceProvider() throws OaiHandlerException { context.withBaseUrl(baseOaiUrl); context.withGranularity(Granularity.Second); + + JdkHttpOaiClient.Builder xoaiClientBuilder = JdkHttpOaiClient.newBuilder().withBaseUrl(getBaseOaiUrl()); if (getCustomHeaders() != null) { for (String headerName : getCustomHeaders().keySet()) { - logger.info("will add custom header; name: "+headerName+", value: "+getCustomHeaders().get(headerName)); - } + logger.fine("adding custom header; name: "+headerName+", value: "+getCustomHeaders().get(headerName)); + } + xoaiClientBuilder = xoaiClientBuilder.withCustomHeaders(getCustomHeaders()); } - context.withOAIClient(JdkHttpOaiClient.newBuilder().withBaseUrl(getBaseOaiUrl()).withCustomHeaders(getCustomHeaders()).build()); + context.withOAIClient(xoaiClientBuilder.build()); serviceProvider = new ServiceProvider(context); } From 87c31d7a9a88c432b6ed71a424089aa76346bb78 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Mon, 23 Jan 2023 17:42:42 -0500 Subject: [PATCH 549/608] #8724 fix paths for new dataset --- .../java/edu/harvard/iq/dataverse/search/IndexServiceBean.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index f9a3cbf5633..e73cce8acbe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -1662,7 +1662,7 @@ private List retrieveDVOPaths(DvObject dvo) { logger.info("failed to find dataverseSegments for dataversePaths for " + SearchFields.SUBTREE + ": " + ex); } List dataversePaths = getDataversePathsFromSegments(dataverseSegments); - if (dataversePaths.size() > 0) { + if (dataversePaths.size() > 0 && dvo.isInstanceofDataverse()) { // removing the dataverse's own id from the paths // fixes bug where if my parent dv was linked my dv was shown as linked to myself dataversePaths.remove(dataversePaths.size() - 1); From dc9a2972c8a7b62b9d3f5ac2dbeddbba91a60ec0 Mon Sep 17 00:00:00 2001 From: Anthony Reyes Date: Mon, 23 Jan 2023 21:02:46 -0800 Subject: [PATCH 550/608] Added suggestions from #9265 I added changes suggested by @qqmyers from #9265 as well as a small change to prevent a horizontal scrollbar from appearing. --- src/main/java/propertyFiles/Bundle.properties | 2 +- src/main/webapp/dataset.xhtml | 13 +++-- src/main/webapp/dataverse.xhtml | 56 +++++-------------- .../resources/js/dv_rebind_bootstrap_ui.js | 4 +- 4 files changed, 24 insertions(+), 51 deletions(-) diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index f7b46c308f5..e8238e79267 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -73,7 +73,7 @@ delete=Delete copyClipboard=Copy to Clipboard truncateMoreBtn=Read full {0} [+] truncateMoreTip=Click to read the full {0}. -truncateLessBtn=Collapse {0} [+] +truncateLessBtn=Collapse {0} [-] truncateLessTip=Click to collapse the {0}. yes=Yes no=No diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml index 1bb862721a5..4d5e0850083 100644 --- a/src/main/webapp/dataset.xhtml +++ b/src/main/webapp/dataset.xhtml @@ -1789,6 +1789,7 @@ +