-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #65 from UNC-Libraries/bxc-4130-aggregate
BXC-4130 - Add aggregate mappings
- Loading branch information
Showing
24 changed files
with
1,223 additions
and
91 deletions.
There are no files selected for viewing
137 changes: 137 additions & 0 deletions
137
src/main/java/edu/unc/lib/boxc/migration/cdm/AggregateFilesCommand.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
package edu.unc.lib.boxc.migration.cdm; | ||
|
||
import edu.unc.lib.boxc.migration.cdm.exceptions.MigrationException; | ||
import edu.unc.lib.boxc.migration.cdm.model.MigrationProject; | ||
import edu.unc.lib.boxc.migration.cdm.options.AggregateFileMappingOptions; | ||
import edu.unc.lib.boxc.migration.cdm.options.Verbosity; | ||
import edu.unc.lib.boxc.migration.cdm.services.AggregateFileMappingService; | ||
import edu.unc.lib.boxc.migration.cdm.services.CdmIndexService; | ||
import edu.unc.lib.boxc.migration.cdm.services.MigrationProjectFactory; | ||
import edu.unc.lib.boxc.migration.cdm.validators.AggregateFilesValidator; | ||
import edu.unc.lib.boxc.migration.cdm.validators.SourceFilesValidator; | ||
import org.apache.commons.lang3.StringUtils; | ||
import org.slf4j.Logger; | ||
import picocli.CommandLine; | ||
|
||
import java.io.IOException; | ||
import java.nio.file.Path; | ||
import java.util.List; | ||
|
||
import static edu.unc.lib.boxc.migration.cdm.model.MigrationProject.AGGREGATE_BOTTOM_MAPPING_FILENAME; | ||
import static edu.unc.lib.boxc.migration.cdm.model.MigrationProject.AGGREGATE_TOP_MAPPING_FILENAME; | ||
import static edu.unc.lib.boxc.migration.cdm.util.CLIConstants.outputLogger; | ||
import static org.slf4j.LoggerFactory.getLogger; | ||
|
||
/** | ||
* @author bbpennel | ||
*/ | ||
@CommandLine.Command(name = "aggregate_files", | ||
description = "Commands related to aggregate file mappings") | ||
public class AggregateFilesCommand { | ||
private static final Logger log = getLogger(AggregateFilesCommand.class); | ||
|
||
@CommandLine.ParentCommand | ||
private CLIMain parentCommand; | ||
|
||
private MigrationProject project; | ||
private AggregateFileMappingService aggregateService; | ||
private CdmIndexService indexService; | ||
|
||
@CommandLine.Command(name = "generate", | ||
description = { | ||
"Generate an aggregate file mapping for this project, mapping compound or grouped works to files.", | ||
"By default will produce a mapping for files to add to the top of a work.", | ||
"To add aggregate files to the bottom of works, use the --sort-bottom flag.", | ||
"Individual projects can include both top and bottom mappings. Respectively, they are stored to " | ||
+ AGGREGATE_TOP_MAPPING_FILENAME + " and " + AGGREGATE_BOTTOM_MAPPING_FILENAME + ".", | ||
"If multiple files are mapped to the same object across separate runs, they will be sorted in " | ||
+ "order added, either at the top or bottom of the work. " | ||
+ "So, earlier added sorts before later within each section.", | ||
"Mappings are produced by listing files from a directory using the --base-path option, " | ||
+ "then searching for matches between those filenames and some filename field in the " | ||
+ "exported CDM records.", | ||
"The filename field is set using the --field-name option.", | ||
"If the value of the filename field does not match the name of the source file, the filename " | ||
+ " can be transformed using regular expressions via the --field-pattern" | ||
+ " and --field-pattern options.", | ||
"The resulting will be written to the source_files.csv for this project, unless " | ||
+ "the --dry-run flag is provided."}) | ||
public int generate(@CommandLine.Mixin AggregateFileMappingOptions options) throws Exception { | ||
long start = System.nanoTime(); | ||
|
||
try { | ||
validateOptions(options); | ||
initialize(options.isSortBottom()); | ||
|
||
aggregateService.generateMapping(options); | ||
outputLogger.info("Aggregate file mapping generated for {} in {}s", project.getProjectName(), | ||
(System.nanoTime() - start) / 1e9); | ||
return 0; | ||
} catch (MigrationException | IllegalArgumentException e) { | ||
outputLogger.info("Cannot generate aggregate mapping: {}", e.getMessage()); | ||
return 1; | ||
} catch (Exception e) { | ||
log.error("Failed to map aggregate files", e); | ||
outputLogger.info("Failed to map aggregate files: {}", e.getMessage(), e); | ||
return 1; | ||
} | ||
} | ||
|
||
@CommandLine.Command(name = "validate", | ||
description = "Validate a aggregate file mappings for this project. Defaults to top mapping.") | ||
public int validate(@CommandLine.Option(names = { "-f", "--force" }, | ||
description = "Ignore incomplete mappings") boolean force, | ||
@CommandLine.Option(names = { "--sort-bottom" }, | ||
description = "Validate bottom sort mapping") boolean sortBottom) throws Exception { | ||
String mappingName = (sortBottom ? "Bottom" : "Top") + " aggregate file mappings"; | ||
try { | ||
initialize(sortBottom); | ||
var validator = new AggregateFilesValidator(sortBottom); | ||
validator.setProject(project); | ||
List<String> errors = validator.validateMappings(force); | ||
|
||
var mappingPath = sortBottom ? project.getAggregateBottomMappingPath() | ||
: project.getAggregateTopMappingPath(); | ||
if (errors.isEmpty()) { | ||
outputLogger.info("PASS: {} at path {} is valid", | ||
mappingName, mappingPath); | ||
return 0; | ||
} else { | ||
if (parentCommand.getVerbosity().equals(Verbosity.QUIET)) { | ||
outputLogger.info("FAIL: {} is invalid with {} errors", | ||
mappingName, errors.size()); | ||
} else { | ||
outputLogger.info("FAIL: {} at path {} is invalid due to the following issues:", | ||
mappingName, mappingPath); | ||
for (String error : errors) { | ||
outputLogger.info(" - " + error); | ||
} | ||
} | ||
return 1; | ||
} | ||
} catch (MigrationException e) { | ||
log.error("Failed to validate {}", mappingName, e); | ||
outputLogger.info("FAIL: Failed to validate {}: {}", e.getMessage()); | ||
return 1; | ||
} | ||
} | ||
|
||
private void validateOptions(AggregateFileMappingOptions options) { | ||
if (options.getBasePath() == null) { | ||
throw new IllegalArgumentException("Must provide a base path or provide the --blank flag"); | ||
} | ||
if (StringUtils.isBlank(options.getExportField())) { | ||
throw new IllegalArgumentException("Must provide an export field"); | ||
} | ||
} | ||
|
||
private void initialize(boolean sortBottom) throws IOException { | ||
Path currentPath = parentCommand.getWorkingDirectory(); | ||
project = MigrationProjectFactory.loadMigrationProject(currentPath); | ||
indexService = new CdmIndexService(); | ||
indexService.setProject(project); | ||
aggregateService = new AggregateFileMappingService(sortBottom); | ||
aggregateService.setIndexService(indexService); | ||
aggregateService.setProject(project); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
22 changes: 22 additions & 0 deletions
22
src/main/java/edu/unc/lib/boxc/migration/cdm/options/AggregateFileMappingOptions.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
package edu.unc.lib.boxc.migration.cdm.options; | ||
|
||
import picocli.CommandLine; | ||
|
||
/** | ||
* Options for aggregate file mapping | ||
* @author bbpennel | ||
*/ | ||
public class AggregateFileMappingOptions extends SourceFileMappingOptions { | ||
@CommandLine.Option(names = { "--sort-bottom" }, | ||
description = { "If specified, aggregate files mapped will be sorted after regular files in the work.", | ||
"If not, then mapped files will be sorted before regular files in the work." } ) | ||
private boolean sortBottom; | ||
|
||
public boolean isSortBottom() { | ||
return sortBottom; | ||
} | ||
|
||
public void setSortBottom(boolean sortBottom) { | ||
this.sortBottom = sortBottom; | ||
} | ||
} |
68 changes: 68 additions & 0 deletions
68
src/main/java/edu/unc/lib/boxc/migration/cdm/services/AggregateFileMappingService.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
package edu.unc.lib.boxc.migration.cdm.services; | ||
|
||
import edu.unc.lib.boxc.migration.cdm.model.CdmFieldInfo; | ||
import edu.unc.lib.boxc.migration.cdm.model.SourceFilesInfo; | ||
import edu.unc.lib.boxc.migration.cdm.options.SourceFileMappingOptions; | ||
|
||
import java.nio.file.Path; | ||
import java.util.List; | ||
import java.util.stream.Collectors; | ||
import java.util.stream.Stream; | ||
|
||
import static edu.unc.lib.boxc.migration.cdm.services.CdmIndexService.ENTRY_TYPE_FIELD; | ||
|
||
/** | ||
* Service which allows mapping of aggregate files like PDFs or TXTs to multi-file works | ||
* @author bbpennel | ||
*/ | ||
public class AggregateFileMappingService extends SourceFileService { | ||
private boolean sortBottom; | ||
|
||
public AggregateFileMappingService(boolean sortBottom) { | ||
this.sortBottom = sortBottom; | ||
} | ||
|
||
@Override | ||
protected Path getMappingPath() { | ||
if (sortBottom) { | ||
return project.getAggregateBottomMappingPath(); | ||
} else { | ||
return project.getAggregateTopMappingPath(); | ||
} | ||
} | ||
|
||
// Query for grouped works or compound objects (no children or single file works) | ||
@Override | ||
protected String buildQuery(SourceFileMappingOptions options) { | ||
String selectStatement; | ||
if (options.isPopulateBlank()) { | ||
selectStatement = "select " + CdmFieldInfo.CDM_ID; | ||
} else { | ||
selectStatement = "select " + CdmFieldInfo.CDM_ID + ", " + options.getExportField(); | ||
} | ||
return selectStatement | ||
+ " from " + CdmIndexService.TB_NAME | ||
+ " where " + ENTRY_TYPE_FIELD + " = '" + CdmIndexService.ENTRY_TYPE_COMPOUND_OBJECT + "'" | ||
+ " or " + ENTRY_TYPE_FIELD + " = '" + CdmIndexService.ENTRY_TYPE_GROUPED_WORK + "'"; | ||
} | ||
|
||
@Override | ||
protected SourceFilesInfo.SourceFileMapping resolveSourcePathConflict(SourceFileMappingOptions options, | ||
SourceFilesInfo.SourceFileMapping origMapping, | ||
SourceFilesInfo.SourceFileMapping updateMapping) { | ||
if (options.isForce() || origMapping.getSourcePaths() == null) { | ||
return updateMapping; | ||
} | ||
// Combine the old and new values, removing any duplicates | ||
List<Path> combined = Stream.concat(origMapping.getSourcePaths().stream(), | ||
updateMapping.getSourcePaths().stream()) | ||
.distinct() | ||
.collect(Collectors.toList()); | ||
updateMapping.setSourcePaths(combined); | ||
return updateMapping; | ||
} | ||
|
||
public void setSortBottom(boolean sortBottom) { | ||
this.sortBottom = sortBottom; | ||
} | ||
} |
Oops, something went wrong.