-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CDAP-15855: Updated validation API. #7
base: develop
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,9 +23,12 @@ | |
import io.cdap.cdap.api.annotation.Plugin; | ||
import io.cdap.cdap.api.common.Bytes; | ||
import io.cdap.cdap.api.plugin.PluginConfig; | ||
import io.cdap.cdap.etl.api.FailureCollector; | ||
import io.cdap.cdap.etl.api.PipelineConfigurer; | ||
import io.cdap.cdap.etl.api.StageConfigurer; | ||
import io.cdap.cdap.etl.api.action.Action; | ||
import io.cdap.cdap.etl.api.action.ActionContext; | ||
import io.cdap.cdap.etl.api.validation.ValidationException; | ||
import org.apache.hadoop.conf.Configuration; | ||
import org.apache.hadoop.fs.FileStatus; | ||
import org.apache.hadoop.fs.FileSystem; | ||
|
@@ -63,14 +66,15 @@ public ToUTF8Action(ToUTF8Config config) { | |
} | ||
|
||
@Override | ||
public void configurePipeline(PipelineConfigurer pipelineConfigurer) throws IllegalArgumentException { | ||
public void configurePipeline(PipelineConfigurer pipelineConfigurer) { | ||
super.configurePipeline(pipelineConfigurer); | ||
config.validate(); | ||
StageConfigurer stageConfigurer = pipelineConfigurer.getStageConfigurer(); | ||
config.validate(stageConfigurer.getFailureCollector()); | ||
} | ||
|
||
@Override | ||
public void run(ActionContext context) throws Exception { | ||
config.validate(); | ||
config.validate(context.getFailureCollector()); | ||
Path source = new Path(config.sourceFilePath); | ||
Path dest = new Path(config.destFilePath); | ||
|
||
|
@@ -83,7 +87,7 @@ public void run(ActionContext context) throws Exception { | |
} else { | ||
// Convert all the files in a directory | ||
PathFilter filter = new PathFilter() { | ||
private final Pattern pattern = Pattern.compile(config.fileRegex); | ||
private final Pattern pattern = Pattern.compile(config.getFileRegex()); | ||
|
||
@Override | ||
public boolean accept(Path path) { | ||
|
@@ -98,7 +102,7 @@ public boolean accept(Path path) { | |
|
||
if (listFiles.length == 0) { | ||
LOG.warn("Not converting any files from source {} matching regular expression", | ||
source.toString(), config.fileRegex); | ||
source.toString(), config.getFileRegex()); | ||
} | ||
|
||
if (fileSystem.exists(dest) && fileSystem.isFile(dest)) { | ||
|
@@ -133,7 +137,7 @@ private void convertSingleFile(Path source, Path dest, FileSystem fileSystem) th | |
out.flush(); | ||
out.close(); | ||
} catch (IOException e) { | ||
if (!config.continueOnError) { | ||
if (!config.getContinueOnError()) { | ||
throw new IOException(String.format("Failed to convert file %s to %s", source.toString(), dest.toString())); | ||
} | ||
LOG.warn(String.format("Exception convert file %s to %s", source.toString(), dest.toString()), e); | ||
|
@@ -144,18 +148,28 @@ private void convertSingleFile(Path source, Path dest, FileSystem fileSystem) th | |
* Config class that contains all properties required for running the unload command. | ||
*/ | ||
public static class ToUTF8Config extends PluginConfig { | ||
|
||
public static final String SOURCE_FILE_PATH = "sourceFilePath"; | ||
public static final String DEST_FILE_PATH = "destFilePath"; | ||
public static final String FILE_REGEX = "fileRegex"; | ||
public static final String CHARSET = "charset"; | ||
|
||
@Name(SOURCE_FILE_PATH) | ||
@Macro | ||
@Description("The source location where the file or files live. You can use glob syntax here such as *.dat.") | ||
private String sourceFilePath; | ||
|
||
@Name(DEST_FILE_PATH) | ||
@Macro | ||
@Description("The destination location where the converted files should be.") | ||
private String destFilePath; | ||
|
||
@Name(CHARSET) | ||
@Macro | ||
@Description("The source charset.") | ||
private String charset; | ||
|
||
@Name(FILE_REGEX) | ||
@Macro | ||
@Nullable | ||
@Description("A regular expression for filtering files such as .*\\.txt") | ||
|
@@ -166,6 +180,13 @@ public static class ToUTF8Config extends PluginConfig { | |
@Description("Set to true if this plugin should ignore errors.") | ||
private Boolean continueOnError; | ||
|
||
public String getFileRegex() { | ||
return (Strings.isNullOrEmpty(fileRegex)) ? ".*" : fileRegex; | ||
} | ||
|
||
public boolean getContinueOnError() { | ||
return continueOnError == null ? false : continueOnError; | ||
} | ||
|
||
public ToUTF8Config(String sourceFilePath, String destFilePath, @Nullable String fileRegex, | ||
String charset, @Nullable Boolean continueOnError) { | ||
|
@@ -179,31 +200,38 @@ public ToUTF8Config(String sourceFilePath, String destFilePath, @Nullable String | |
/** | ||
* Validates the config parameters required for unloading the data. | ||
*/ | ||
private void validate() throws IllegalArgumentException { | ||
try { | ||
Charset.forName(charset); | ||
} catch (UnsupportedCharsetException e) { | ||
throw new IllegalArgumentException("The charset entered is not valid. Please use a value " + | ||
"from https://docs.oracle.com/javase/8/docs/technotes/guides/intl/encoding.doc.html.", e); | ||
private void validate(FailureCollector collector) throws ValidationException { | ||
if (Strings.isNullOrEmpty(sourceFilePath)) { | ||
collector.addFailure("Source file or folder is required.", "Set source file or folder.") | ||
.withConfigProperty(SOURCE_FILE_PATH); | ||
collector.getOrThrowException(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do we need to throw exception here? This would just validate one property and throw an exception. we should validate other properties as well before throwing an exception |
||
} | ||
try { | ||
Pattern.compile(fileRegex); | ||
} catch (Exception e) { | ||
throw new IllegalArgumentException("The regular expression pattern provided is not a valid " + | ||
"regular expression.", e); | ||
} | ||
if (Strings.isNullOrEmpty(sourceFilePath)) { | ||
throw new IllegalArgumentException("Source file or folder is required."); | ||
Path source = new Path(sourceFilePath); | ||
source.getFileSystem(new Configuration()); | ||
} catch (IOException e) { | ||
collector.addFailure("Cannot determine the file system of the source file.", null) | ||
.withConfigProperty(SOURCE_FILE_PATH).withStacktrace(e.getStackTrace()); | ||
} | ||
if (Strings.isNullOrEmpty(destFilePath)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this check can be moved up to be validated along with source path |
||
throw new IllegalArgumentException("Destination file or folder is required."); | ||
collector.addFailure("Destination file or folder is required.", | ||
"Set destination file or folder.").withConfigProperty(DEST_FILE_PATH); | ||
} | ||
try { | ||
Path source = new Path(sourceFilePath); | ||
FileSystem fileSystem = source.getFileSystem(new Configuration()); | ||
} catch (IOException e) { | ||
throw new IllegalArgumentException("Cannot determine the file system of the source file.", e); | ||
Pattern.compile(getFileRegex()); | ||
} catch (Exception e) { | ||
collector.addFailure("The regular expression pattern provided is not a valid " + | ||
"regular expression.", "Set correct regular expression pattern.") | ||
.withConfigProperty(FILE_REGEX); | ||
} | ||
try { | ||
Charset.forName(charset); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this can be validated along with source path. |
||
} catch (UnsupportedCharsetException e) { | ||
collector.addFailure(e.getMessage(), "The charset entered is not valid. Please use a value " + | ||
"from https://docs.oracle.com/javase/8/docs/technotes/guides/intl/encoding.doc.html.") | ||
.withConfigProperty(CHARSET); | ||
} | ||
collector.getOrThrowException(); | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,10 @@ | |
|
||
package io.cdap.plugin; | ||
|
||
import io.cdap.cdap.etl.api.validation.CauseAttributes; | ||
import io.cdap.cdap.etl.api.validation.ValidationException; | ||
import io.cdap.cdap.etl.api.validation.ValidationFailure; | ||
import io.cdap.cdap.etl.mock.action.MockActionContext; | ||
import io.cdap.cdap.etl.mock.common.MockPipelineConfigurer; | ||
import org.junit.ClassRule; | ||
import org.junit.Test; | ||
|
@@ -34,6 +38,8 @@ | |
public class ToUTF8ConfigTest { | ||
private static final String ISO_8859_FILE_NAME = "20150320_clo_prod_cln.dat"; | ||
private static final String UTF_8_FILE_NAME = "20150320_clo_prod_cln.dat.utf8"; | ||
private static final String MOCK_NAME = "mockstage"; | ||
private static final String STAGE = "stage"; | ||
|
||
private FileFilter filter = new FileFilter() { | ||
private final Pattern pattern = Pattern.compile("[^\\.].*\\.utf8"); | ||
|
@@ -57,7 +63,8 @@ public void testSingleFile() throws Exception { | |
null, "ISO-8859-1", false); | ||
MockPipelineConfigurer configurer = new MockPipelineConfigurer(null); | ||
new ToUTF8Action(config).configurePipeline(configurer); | ||
new ToUTF8Action(config).run(null); | ||
MockActionContext context = new MockActionContext(); | ||
new ToUTF8Action(config).run(context); | ||
assertEquals(1, destFolder.listFiles(filter).length); | ||
assertEquals(UTF_8_FILE_NAME, destFolder.listFiles(filter)[0].getName()); | ||
} | ||
|
@@ -73,7 +80,8 @@ public void testFolder() throws Exception { | |
null, "ISO-8859-1", false); | ||
MockPipelineConfigurer configurer = new MockPipelineConfigurer(null); | ||
new ToUTF8Action(config).configurePipeline(configurer); | ||
new ToUTF8Action(config).run(null); | ||
MockActionContext context = new MockActionContext(); | ||
new ToUTF8Action(config).run(context); | ||
assertEquals(1, destFolder.listFiles(filter).length); | ||
assertEquals(UTF_8_FILE_NAME, destFolder.listFiles(filter)[0].getName()); | ||
} | ||
|
@@ -90,7 +98,8 @@ public void testFolderWithGlob() throws Exception { | |
null, "ISO-8859-1", false); | ||
MockPipelineConfigurer configurer = new MockPipelineConfigurer(null); | ||
new ToUTF8Action(config).configurePipeline(configurer); | ||
new ToUTF8Action(config).run(null); | ||
MockActionContext context = new MockActionContext(); | ||
new ToUTF8Action(config).run(context); | ||
assertEquals(1, destFolder.listFiles(filter).length); | ||
assertEquals(UTF_8_FILE_NAME, destFolder.listFiles(filter)[0].getName()); | ||
} | ||
|
@@ -106,8 +115,82 @@ public void testFolderWithRegEx() throws Exception { | |
".*\\.dat", "ISO-8859-1", false); | ||
MockPipelineConfigurer configurer = new MockPipelineConfigurer(null); | ||
new ToUTF8Action(config).configurePipeline(configurer); | ||
new ToUTF8Action(config).run(null); | ||
MockActionContext context = new MockActionContext(); | ||
new ToUTF8Action(config).run(context); | ||
assertEquals(1, destFolder.listFiles(filter).length); | ||
assertEquals(UTF_8_FILE_NAME, destFolder.listFiles(filter)[0].getName()); | ||
} | ||
|
||
@Test | ||
public void testValidationEmptySourcePath() { | ||
ToUTF8Action.ToUTF8Config config = new ToUTF8Action.ToUTF8Config("", "", | ||
".*\\.dat", "ISO-8859-1", false); | ||
MockPipelineConfigurer configurer = new MockPipelineConfigurer(null); | ||
try { | ||
new ToUTF8Action(config).configurePipeline(configurer); | ||
} catch (ValidationException e) { | ||
assertEquals(1, e.getFailures().size()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we also add unit tests where more than 1 failures are collected? |
||
ValidationFailure validationFailure = e.getFailures().get(0); | ||
assertEquals("Source file or folder is required.", validationFailure.getMessage()); | ||
assertEquals(1, validationFailure.getCauses().size()); | ||
ValidationFailure.Cause cause = validationFailure.getCauses().get(0); | ||
assertEquals(ToUTF8Action.ToUTF8Config.SOURCE_FILE_PATH, cause.getAttribute(CauseAttributes.STAGE_CONFIG)); | ||
assertEquals(MOCK_NAME, cause.getAttribute(STAGE)); | ||
} | ||
} | ||
|
||
@Test | ||
public void testValidationEmptyDestinationPath() { | ||
ToUTF8Action.ToUTF8Config config = new ToUTF8Action.ToUTF8Config("_12_test", "", | ||
".*\\.dat", "ISO-8859-1", false); | ||
MockPipelineConfigurer configurer = new MockPipelineConfigurer(null); | ||
try { | ||
new ToUTF8Action(config).configurePipeline(configurer); | ||
} catch (ValidationException e) { | ||
assertEquals(1, e.getFailures().size()); | ||
ValidationFailure validationFailure = e.getFailures().get(0); | ||
assertEquals("Destination file or folder is required.", validationFailure.getMessage()); | ||
assertEquals(1, validationFailure.getCauses().size()); | ||
ValidationFailure.Cause cause = validationFailure.getCauses().get(0); | ||
assertEquals(ToUTF8Action.ToUTF8Config.DEST_FILE_PATH, cause.getAttribute(CauseAttributes.STAGE_CONFIG)); | ||
assertEquals(MOCK_NAME, cause.getAttribute(STAGE)); | ||
} | ||
} | ||
|
||
@Test | ||
public void testValidationFileRegularExpression() { | ||
ToUTF8Action.ToUTF8Config config = new ToUTF8Action.ToUTF8Config("source_path", "dest_path", | ||
"*\\.dat", "ISO-8859-1", false); | ||
MockPipelineConfigurer configurer = new MockPipelineConfigurer(null); | ||
try { | ||
new ToUTF8Action(config).configurePipeline(configurer); | ||
} catch (ValidationException e) { | ||
assertEquals(1, e.getFailures().size()); | ||
ValidationFailure validationFailure = e.getFailures().get(0); | ||
assertEquals("The regular expression pattern provided is not a valid regular expression.", | ||
validationFailure.getMessage()); | ||
assertEquals(1, validationFailure.getCauses().size()); | ||
ValidationFailure.Cause cause = validationFailure.getCauses().get(0); | ||
assertEquals(ToUTF8Action.ToUTF8Config.FILE_REGEX, cause.getAttribute(CauseAttributes.STAGE_CONFIG)); | ||
assertEquals(MOCK_NAME, cause.getAttribute(STAGE)); | ||
} | ||
} | ||
|
||
@Test | ||
public void testValidationCharacterSet() { | ||
ToUTF8Action.ToUTF8Config config = new ToUTF8Action.ToUTF8Config("source_path", "dest_path", | ||
".*\\.dat", "ISO-885-1", false); | ||
MockPipelineConfigurer configurer = new MockPipelineConfigurer(null); | ||
try { | ||
new ToUTF8Action(config).configurePipeline(configurer); | ||
} catch (ValidationException e) { | ||
assertEquals(1, e.getFailures().size()); | ||
ValidationFailure validationFailure = e.getFailures().get(0); | ||
assertEquals("ISO-885-1", validationFailure.getMessage()); | ||
assertEquals(1, validationFailure.getCauses().size()); | ||
ValidationFailure.Cause cause = validationFailure.getCauses().get(0); | ||
assertEquals(ToUTF8Action.ToUTF8Config.CHARSET, cause.getAttribute(CauseAttributes.STAGE_CONFIG)); | ||
assertEquals(MOCK_NAME, cause.getAttribute(STAGE)); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If the error message and the corrective action are basically the same (like here), it's better to just have the message and leave the corrective action null.