Skip to content

Commit

Permalink
BXC-4636 source_files add command (#102)
Browse files Browse the repository at this point in the history
* source_files add command and tests

* test resources

* remove ensureMappingState and fix tests

* fix tests?

* fix gatherFilesystemCandidatePaths and tests

* make new options classes, add case insensitive check for extensions, rename optionalIdPrefix and addToMapping, parseInt base 16, fix/add tests

* add javadoc, move update option to GenerateSourceFileMappingOptions, populate source_file with absolute paths

* skip relativize

* lastId = 0 if empty csv, fix generateFileId if statement, add empty csv test

* add split to option, return 0 if parsing error, add test
  • Loading branch information
krwong committed Jul 30, 2024
1 parent e3616e2 commit 1932f99
Show file tree
Hide file tree
Showing 17 changed files with 658 additions and 178 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import java.nio.file.Path;
import java.util.List;

import edu.unc.lib.boxc.migration.cdm.options.GenerateSourceFileMappingOptions;
import edu.unc.lib.boxc.migration.cdm.status.SourceFilesSummaryService;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
Expand Down Expand Up @@ -44,7 +45,7 @@ public class AccessFilesCommand {
description = {
"Generate the optional access copy mapping file for this project.",
"See the source_files command for more details about usage"})
public int generate(@Mixin SourceFileMappingOptions options) throws Exception {
public int generate(@Mixin GenerateSourceFileMappingOptions options) throws Exception {
long start = System.nanoTime();

try {
Expand Down Expand Up @@ -119,7 +120,7 @@ public int status() throws Exception {
}
}

private void validateOptions(SourceFileMappingOptions options) {
private void validateOptions(GenerateSourceFileMappingOptions options) {
if (options.getBasePath() == null) {
throw new IllegalArgumentException("Must provide a base path");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@
import java.nio.file.Path;
import java.util.List;

import edu.unc.lib.boxc.migration.cdm.options.AddSourceFileMappingOptions;
import edu.unc.lib.boxc.migration.cdm.options.ExportUnmappedSourceFilesOptions;
import edu.unc.lib.boxc.migration.cdm.options.GenerateSourceFileMappingOptions;
import edu.unc.lib.boxc.migration.cdm.services.CdmExportFilesService;
import edu.unc.lib.boxc.migration.cdm.services.CdmFieldService;
import edu.unc.lib.boxc.migration.cdm.services.CdmFileRetrievalService;
import edu.unc.lib.boxc.migration.cdm.services.StreamingMetadataService;
import edu.unc.lib.boxc.migration.cdm.status.SourceFilesSummaryService;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.math3.analysis.function.Add;
import org.slf4j.Logger;

import edu.unc.lib.boxc.migration.cdm.exceptions.MigrationException;
Expand Down Expand Up @@ -61,7 +64,7 @@ public class SourceFilesCommand {
+ " and --field-pattern options.",
"The resulting will be written to the source_files.csv for this project, unless "
+ "the --dry-run flag is provided."})
public int generate(@Mixin SourceFileMappingOptions options) throws Exception {
public int generate(@Mixin GenerateSourceFileMappingOptions options) throws Exception {
long start = System.nanoTime();

try {
Expand Down Expand Up @@ -138,7 +141,32 @@ public int status() throws Exception {
}
}

private void validateOptions(SourceFileMappingOptions options) {
@Command(name = "add",
description = {"Add files from the filesystem to the source mapping file for this project.",
"Mappings are produced by listing files from a directory using the --base-path option.",
"The user provides a list of file extensions field to include, extensions set using the --extensions option.",
"The user can set an optional prefix for chompb ids using the --optional-prefix option.",
"The resulting will be written to the source_files.csv for this project, unless "
+ "the --dry-run flag is provided."})
public int add(@Mixin AddSourceFileMappingOptions options) throws Exception {
try {
long start = System.nanoTime();
initialize(options.getDryRun());
sourceService.addToMapping(options);
outputLogger.info("Source file mapping added for {} in {}s", project.getProjectName(),
(System.nanoTime() - start) / 1e9);
return 0;
} catch (MigrationException e) {
outputLogger.info("Add source file mapping failed: {}", e.getMessage(), e);
return 1;
} catch (Exception e) {
log.error("Add failed", e);
outputLogger.info("Add source file mapping failed: {}", e.getMessage(), e);
return 1;
}
}

private void validateOptions(GenerateSourceFileMappingOptions options) {
// If populating a blank mapping then other arguments not needed.
if (options.isPopulateBlank()) {
return;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package edu.unc.lib.boxc.migration.cdm.options;

import picocli.CommandLine.Option;

import java.util.List;

/**
* Options for add to file mapping
* @author krwong
*/
public class AddSourceFileMappingOptions extends SourceFileMappingOptions {
@Option(names = {"-e", "--extensions"},
split = ",",
description = {"Provide list of file extensions to include in the source mapping file. Defaults to tif"},
defaultValue = "tif",
converter = LowerCaseConverter.class)
private List<String> extensions;

@Option(names = {"-p", "--id-prefix"},
description = "Provide an optional prefix for IDs within the migration project. " +
"These IDs will only be used for work in chompb.")
private String optionalIdPrefix;

public List<String> getExtensions() {
return extensions;
}

public void setExtensions(List<String> extensions) {
this.extensions = extensions;
}

public String getOptionalIdPrefix() {
return optionalIdPrefix;
}

public void setOptionalIdPrefix(String optionalIdPrefix) {
this.optionalIdPrefix = optionalIdPrefix;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
* Options for aggregate file mapping
* @author bbpennel
*/
public class AggregateFileMappingOptions extends SourceFileMappingOptions {
public class AggregateFileMappingOptions extends GenerateSourceFileMappingOptions {
@CommandLine.Option(names = { "--sort-bottom" },
description = { "If specified, aggregate files mapped will be sorted after regular files in the work.",
"If not, then mapped files will be sorted before regular files in the work." } )
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
package edu.unc.lib.boxc.migration.cdm.options;

import picocli.CommandLine.Option;

/**
* Options for generate file mapping
* @author krwong
*/
public class GenerateSourceFileMappingOptions extends SourceFileMappingOptions {
@Option(names = {"-g", "--glob-pattern"},
description = {
"Optional pattern for adjusting which files within the base path to search.",
"Must be relative to the base path. Follows glob pattern syntax.",
"For example, to only match tiff files within the base path:",
" *.tiff",
"To match any file within any directory at a depth of 2 subdirectories:",
" */*/*",
"Or to match tiff files at any depth:",
" **/*.tiff"})
private String pathPattern;

@Option(names = {"-n", "--field-name"},
description = {
"Name of the CDM export field which will be transformed by the field matching pattern "
+ "to produce the source file filename for matching purposes."},
defaultValue = "file")
private String exportField;

@Option(names = {"-p", "--field-pattern"},
description = {
"Regular expression which will be used to extract portions of the export field value "
+ "for use in the filename template. Use matching groups for this.",
"Must match the entire value of the export field.",
"For example, to extract numeric portions of the value: 276_214_E.tif",
"You could provide the pattern: (\\d+)\\_(\\d+)_E.tif"},
defaultValue = "(.+)")
private String fieldMatchingPattern;

@Option(names = {"-t", "--file-template"},
description = {
"Template used to produce expected source file filenames.",
"It should be used with matching groups from --field-pattern.",
"NOTE: Use single quotes to wrap this value, or escape the $ characters as \\$.",
"Given the field pattern above, it could be templated out to: 00276_op0214_0001_e.tif",
"With the template: 00$1_op0$2_0001_e.tif"},
defaultValue = "$1")
private String filenameTemplate;

@Option(names = {"-l", "--lower-template"},
description = "Convert the filename produced from the --file-temp option to lowercase "
+ "prior to attempting to match against source files.")
private boolean lowercaseTemplate;

@Option(names = { "-B", "--blank"},
description = "Populate a blank source mapping file. Entries will be added for each object as per normal,"
+ " but without attempting to map them to any files")
private boolean populateBlank;

@Option(names = {"-u", "--update"},
description = {
"If provided, then any source file matches produced will be used to update an existing"
+ " source file mapping file, instead of attempting to create a new one.",
"This can be used to build up the mapping in multiple passes"})
private boolean update;

@Option(names = { "-f", "--force"},
description = "Overwrite mapping file if one already exists")
private boolean force;

public String getPathPattern() {
return pathPattern;
}

public void setPathPattern(String pathPattern) {
this.pathPattern = pathPattern;
}

public String getExportField() {
return exportField;
}

public void setExportField(String exportField) {
this.exportField = exportField;
}

public String getFieldMatchingPattern() {
return fieldMatchingPattern;
}

public void setFieldMatchingPattern(String fieldMatchingPattern) {
this.fieldMatchingPattern = fieldMatchingPattern;
}

public String getFilenameTemplate() {
return filenameTemplate;
}

public void setFilenameTemplate(String filenameTemplate) {
this.filenameTemplate = filenameTemplate;
}

public boolean isLowercaseTemplate() {
return lowercaseTemplate;
}

public void setLowercaseTemplate(boolean lowercaseTemplate) {
this.lowercaseTemplate = lowercaseTemplate;
}

public boolean getUpdate() {
return update;
}

public void setUpdate(boolean update) {
this.update = update;
}

public boolean isForce() {
return force;
}

public void setForce(boolean force) {
this.force = force;
}

public boolean isPopulateBlank() {
return populateBlank;
}

public void setPopulateBlank(boolean populateBlank) {
this.populateBlank = populateBlank;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package edu.unc.lib.boxc.migration.cdm.options;

import picocli.CommandLine.ITypeConverter;

/**
* Convert file extensions to lowercase (for case insensitve check of extension)
* @author krwong
*/
public class LowerCaseConverter implements ITypeConverter<String> {
@Override
public String convert(String value) throws Exception {
return value.toLowerCase();
}
}
Loading

0 comments on commit 1932f99

Please sign in to comment.