Skip to content

Commit

Permalink
Merge pull request #65 from UNC-Libraries/bxc-4130-aggregate
Browse files Browse the repository at this point in the history
BXC-4130 - Add aggregate mappings
  • Loading branch information
sharonluong authored Aug 23, 2023
2 parents c4027e4 + cf4f1c6 commit 18a4314
Show file tree
Hide file tree
Showing 24 changed files with 1,223 additions and 91 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
package edu.unc.lib.boxc.migration.cdm;

import edu.unc.lib.boxc.migration.cdm.exceptions.MigrationException;
import edu.unc.lib.boxc.migration.cdm.model.MigrationProject;
import edu.unc.lib.boxc.migration.cdm.options.AggregateFileMappingOptions;
import edu.unc.lib.boxc.migration.cdm.options.Verbosity;
import edu.unc.lib.boxc.migration.cdm.services.AggregateFileMappingService;
import edu.unc.lib.boxc.migration.cdm.services.CdmIndexService;
import edu.unc.lib.boxc.migration.cdm.services.MigrationProjectFactory;
import edu.unc.lib.boxc.migration.cdm.validators.AggregateFilesValidator;
import edu.unc.lib.boxc.migration.cdm.validators.SourceFilesValidator;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import picocli.CommandLine;

import java.io.IOException;
import java.nio.file.Path;
import java.util.List;

import static edu.unc.lib.boxc.migration.cdm.model.MigrationProject.AGGREGATE_BOTTOM_MAPPING_FILENAME;
import static edu.unc.lib.boxc.migration.cdm.model.MigrationProject.AGGREGATE_TOP_MAPPING_FILENAME;
import static edu.unc.lib.boxc.migration.cdm.util.CLIConstants.outputLogger;
import static org.slf4j.LoggerFactory.getLogger;

/**
* @author bbpennel
*/
@CommandLine.Command(name = "aggregate_files",
description = "Commands related to aggregate file mappings")
public class AggregateFilesCommand {
private static final Logger log = getLogger(AggregateFilesCommand.class);

@CommandLine.ParentCommand
private CLIMain parentCommand;

private MigrationProject project;
private AggregateFileMappingService aggregateService;
private CdmIndexService indexService;

@CommandLine.Command(name = "generate",
description = {
"Generate an aggregate file mapping for this project, mapping compound or grouped works to files.",
"By default will produce a mapping for files to add to the top of a work.",
"To add aggregate files to the bottom of works, use the --sort-bottom flag.",
"Individual projects can include both top and bottom mappings. Respectively, they are stored to "
+ AGGREGATE_TOP_MAPPING_FILENAME + " and " + AGGREGATE_BOTTOM_MAPPING_FILENAME + ".",
"If multiple files are mapped to the same object across separate runs, they will be sorted in "
+ "order added, either at the top or bottom of the work. "
+ "So, earlier added sorts before later within each section.",
"Mappings are produced by listing files from a directory using the --base-path option, "
+ "then searching for matches between those filenames and some filename field in the "
+ "exported CDM records.",
"The filename field is set using the --field-name option.",
"If the value of the filename field does not match the name of the source file, the filename "
+ " can be transformed using regular expressions via the --field-pattern"
+ " and --field-pattern options.",
"The resulting will be written to the source_files.csv for this project, unless "
+ "the --dry-run flag is provided."})
public int generate(@CommandLine.Mixin AggregateFileMappingOptions options) throws Exception {
long start = System.nanoTime();

try {
validateOptions(options);
initialize(options.isSortBottom());

aggregateService.generateMapping(options);
outputLogger.info("Aggregate file mapping generated for {} in {}s", project.getProjectName(),
(System.nanoTime() - start) / 1e9);
return 0;
} catch (MigrationException | IllegalArgumentException e) {
outputLogger.info("Cannot generate aggregate mapping: {}", e.getMessage());
return 1;
} catch (Exception e) {
log.error("Failed to map aggregate files", e);
outputLogger.info("Failed to map aggregate files: {}", e.getMessage(), e);
return 1;
}
}

@CommandLine.Command(name = "validate",
description = "Validate a aggregate file mappings for this project. Defaults to top mapping.")
public int validate(@CommandLine.Option(names = { "-f", "--force" },
description = "Ignore incomplete mappings") boolean force,
@CommandLine.Option(names = { "--sort-bottom" },
description = "Validate bottom sort mapping") boolean sortBottom) throws Exception {
String mappingName = (sortBottom ? "Bottom" : "Top") + " aggregate file mappings";
try {
initialize(sortBottom);
var validator = new AggregateFilesValidator(sortBottom);
validator.setProject(project);
List<String> errors = validator.validateMappings(force);

var mappingPath = sortBottom ? project.getAggregateBottomMappingPath()
: project.getAggregateTopMappingPath();
if (errors.isEmpty()) {
outputLogger.info("PASS: {} at path {} is valid",
mappingName, mappingPath);
return 0;
} else {
if (parentCommand.getVerbosity().equals(Verbosity.QUIET)) {
outputLogger.info("FAIL: {} is invalid with {} errors",
mappingName, errors.size());
} else {
outputLogger.info("FAIL: {} at path {} is invalid due to the following issues:",
mappingName, mappingPath);
for (String error : errors) {
outputLogger.info(" - " + error);
}
}
return 1;
}
} catch (MigrationException e) {
log.error("Failed to validate {}", mappingName, e);
outputLogger.info("FAIL: Failed to validate {}: {}", e.getMessage());
return 1;
}
}

private void validateOptions(AggregateFileMappingOptions options) {
if (options.getBasePath() == null) {
throw new IllegalArgumentException("Must provide a base path or provide the --blank flag");
}
if (StringUtils.isBlank(options.getExportField())) {
throw new IllegalArgumentException("Must provide an export field");
}
}

private void initialize(boolean sortBottom) throws IOException {
Path currentPath = parentCommand.getWorkingDirectory();
project = MigrationProjectFactory.loadMigrationProject(currentPath);
indexService = new CdmIndexService();
indexService.setProject(project);
aggregateService = new AggregateFileMappingService(sortBottom);
aggregateService.setIndexService(indexService);
aggregateService.setProject(project);
}
}
3 changes: 2 additions & 1 deletion src/main/java/edu/unc/lib/boxc/migration/cdm/CLIMain.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@
ProjectPropertiesCommand.class,
VerifyPostMigrationCommand.class,
MigrationTypeReportCommand.class,
FilterIndexCommand.class
FilterIndexCommand.class,
AggregateFilesCommand.class
})
public class CLIMain implements Callable<Integer> {
@Option(names = { "-w", "--work-dir" },
Expand Down
11 changes: 11 additions & 0 deletions src/main/java/edu/unc/lib/boxc/migration/cdm/SipsCommand.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import java.nio.file.Path;
import java.util.List;

import edu.unc.lib.boxc.migration.cdm.services.AggregateFileMappingService;
import org.slf4j.Logger;

import edu.unc.lib.boxc.migration.cdm.exceptions.MigrationException;
Expand Down Expand Up @@ -43,6 +44,8 @@ public class SipsCommand {
private DescriptionsService descriptionsService;
private DestinationsService destinationsService;
private CdmIndexService indexService;
private AggregateFileMappingService aggregateTopMappingService;
private AggregateFileMappingService aggregateBottomMappingService;
private PIDMinter pidMinter;
private PremisLoggerFactoryImpl premisLoggerFactory;
private SipService sipService;
Expand Down Expand Up @@ -126,6 +129,12 @@ private void initialize() throws IOException {
descriptionsService.setProject(project);
destinationsService = new DestinationsService();
destinationsService.setProject(project);
aggregateTopMappingService = new AggregateFileMappingService(false);
aggregateTopMappingService.setIndexService(indexService);
aggregateTopMappingService.setProject(project);
aggregateBottomMappingService = new AggregateFileMappingService(true);
aggregateBottomMappingService.setIndexService(indexService);
aggregateBottomMappingService.setProject(project);

sipService = new SipService();
sipService.setIndexService(indexService);
Expand All @@ -136,5 +145,7 @@ private void initialize() throws IOException {
sipService.setPremisLoggerFactory(premisLoggerFactory);
sipService.setProject(project);
sipService.setChompbConfig(parentCommand.getChompbConfig());
sipService.setAggregateTopMappingService(aggregateTopMappingService);
sipService.setAggregateBottomMappingService(aggregateBottomMappingService);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ public class MigrationProject {
public static final String DESTINATIONS_FILENAME = "destinations.csv";
public static final String SOURCE_MAPPING_FILENAME = "source_files.csv";
public static final String ACCESS_MAPPING_FILENAME = "access_files.csv";
public static final String AGGREGATE_TOP_MAPPING_FILENAME = "aggregate_top_files.csv";
public static final String AGGREGATE_BOTTOM_MAPPING_FILENAME = "aggregate_bottom_files.csv";
public static final String GROUP_MAPPING_FILENAME = "group_mappings.csv";
public static final String SIPS_DIRNAME = "sips";
public static final String REDIRECT_MAPPING_FILENAME = "redirect_mappings.csv";
Expand Down Expand Up @@ -118,6 +120,14 @@ public Path getAccessFilesMappingPath() {
return projectPath.resolve(ACCESS_MAPPING_FILENAME);
}

public Path getAggregateTopMappingPath() {
return projectPath.resolve(AGGREGATE_TOP_MAPPING_FILENAME);
}

public Path getAggregateBottomMappingPath() {
return projectPath.resolve(AGGREGATE_BOTTOM_MAPPING_FILENAME);
}

/**
* @return Path of the object group mapping files mapping file
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,19 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;

import org.apache.commons.lang3.StringUtils;

/**
* Container class for accessing mappings of files to migration objects.
* This may be source file mappings, but is also used for other types of file mappings using the same structure.
*
* @author bbpennel
*/
public class SourceFilesInfo {
public static final String SEPARATOR = "|";
public static final String ESCAPED_SEPARATOR = "\\|";
public static final String POTENTIAL_MATCHES_FIELD = "potential_matches";
public static final String SOURCE_FILE_FIELD = "source_file";
public static final String EXPORT_MATCHING_FIELD = "matching_value";
Expand Down Expand Up @@ -41,13 +47,16 @@ public void setMappings(List<SourceFileMapping> mappings) {
* @return mapping with matching cdm id, or null if no match
*/
public SourceFileMapping getMappingByCdmId(String cdmId) {
return this.mappings.stream().filter(m -> m.getCdmId().equals(cdmId)).findFirst().orElseGet(null);
return this.mappings.stream().filter(m -> m.getCdmId().equals(cdmId)).findFirst().orElse(null);
}

/**
* An individual mapping from a migration object to associated files.
*/
public static class SourceFileMapping {
private String cdmId;
private String matchingValue;
private Path sourcePath;
private List<Path> sourcePaths;
private List<String> potentialMatches;

public String getCdmId() {
Expand All @@ -66,15 +75,33 @@ public void setMatchingValue(String matchingValue) {
this.matchingValue = matchingValue;
}

public Path getSourcePath() {
return sourcePath;
public List<Path> getSourcePaths() {
return sourcePaths;
}

public Path getFirstSourcePath() {
return (sourcePaths == null) ? null : sourcePaths.get(0);
}

public void setSourcePaths(List<Path> sourcePaths) {
this.sourcePaths = sourcePaths;
}

public void setSourcePath(String sourcePath) {
if (StringUtils.isBlank(sourcePath)) {
this.sourcePath = null;
public void setSourcePaths(String sourcePaths) {
if (StringUtils.isBlank(sourcePaths)) {
this.sourcePaths = null;
} else {
this.sourcePaths = Arrays.stream(sourcePaths.split(ESCAPED_SEPARATOR))
.map(Paths::get)
.collect(Collectors.toList());
}
}

public String getSourcePathString() {
if (sourcePaths == null) {
return null;
} else {
this.sourcePath = Paths.get(sourcePath);
return sourcePaths.stream().map(Object::toString).collect(Collectors.joining(SEPARATOR));
}
}

Expand All @@ -86,7 +113,7 @@ public void setPotentialMatches(String potentialMatches) {
if (StringUtils.isBlank(potentialMatches)) {
this.potentialMatches = null;
} else {
this.potentialMatches = Arrays.asList(potentialMatches.split(","));
this.potentialMatches = Arrays.asList(potentialMatches.split(ESCAPED_SEPARATOR));
}
}

Expand All @@ -98,7 +125,7 @@ public String getPotentialMatchesString() {
if (potentialMatches == null) {
return null;
} else {
return String.join(",", potentialMatches);
return String.join(SEPARATOR, potentialMatches);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package edu.unc.lib.boxc.migration.cdm.options;

import picocli.CommandLine;

/**
* Options for aggregate file mapping
* @author bbpennel
*/
public class AggregateFileMappingOptions extends SourceFileMappingOptions {
@CommandLine.Option(names = { "--sort-bottom" },
description = { "If specified, aggregate files mapped will be sorted after regular files in the work.",
"If not, then mapped files will be sorted before regular files in the work." } )
private boolean sortBottom;

public boolean isSortBottom() {
return sortBottom;
}

public void setSortBottom(boolean sortBottom) {
this.sortBottom = sortBottom;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package edu.unc.lib.boxc.migration.cdm.services;

import edu.unc.lib.boxc.migration.cdm.model.CdmFieldInfo;
import edu.unc.lib.boxc.migration.cdm.model.SourceFilesInfo;
import edu.unc.lib.boxc.migration.cdm.options.SourceFileMappingOptions;

import java.nio.file.Path;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static edu.unc.lib.boxc.migration.cdm.services.CdmIndexService.ENTRY_TYPE_FIELD;

/**
* Service which allows mapping of aggregate files like PDFs or TXTs to multi-file works
* @author bbpennel
*/
public class AggregateFileMappingService extends SourceFileService {
private boolean sortBottom;

public AggregateFileMappingService(boolean sortBottom) {
this.sortBottom = sortBottom;
}

@Override
protected Path getMappingPath() {
if (sortBottom) {
return project.getAggregateBottomMappingPath();
} else {
return project.getAggregateTopMappingPath();
}
}

// Query for grouped works or compound objects (no children or single file works)
@Override
protected String buildQuery(SourceFileMappingOptions options) {
String selectStatement;
if (options.isPopulateBlank()) {
selectStatement = "select " + CdmFieldInfo.CDM_ID;
} else {
selectStatement = "select " + CdmFieldInfo.CDM_ID + ", " + options.getExportField();
}
return selectStatement
+ " from " + CdmIndexService.TB_NAME
+ " where " + ENTRY_TYPE_FIELD + " = '" + CdmIndexService.ENTRY_TYPE_COMPOUND_OBJECT + "'"
+ " or " + ENTRY_TYPE_FIELD + " = '" + CdmIndexService.ENTRY_TYPE_GROUPED_WORK + "'";
}

@Override
protected SourceFilesInfo.SourceFileMapping resolveSourcePathConflict(SourceFileMappingOptions options,
SourceFilesInfo.SourceFileMapping origMapping,
SourceFilesInfo.SourceFileMapping updateMapping) {
if (options.isForce() || origMapping.getSourcePaths() == null) {
return updateMapping;
}
// Combine the old and new values, removing any duplicates
List<Path> combined = Stream.concat(origMapping.getSourcePaths().stream(),
updateMapping.getSourcePaths().stream())
.distinct()
.collect(Collectors.toList());
updateMapping.setSourcePaths(combined);
return updateMapping;
}

public void setSortBottom(boolean sortBottom) {
this.sortBottom = sortBottom;
}
}
Loading

0 comments on commit 18a4314

Please sign in to comment.