From 1a86e0463a25714a1e82c21d589a7048a63f9c5a Mon Sep 17 00:00:00 2001 From: Peter Nemere Date: Tue, 26 Mar 2024 13:16:06 +1000 Subject: [PATCH 1/2] PIXL EM importer was assuming 1 subdir, not sure why, no longer --- api/dataimport/for-trigger_test.go | 18 +++++++++++++ .../internal/converters/pixlem/import.go | 26 +------------------ 2 files changed, 19 insertions(+), 25 deletions(-) diff --git a/api/dataimport/for-trigger_test.go b/api/dataimport/for-trigger_test.go index 39ce1297..784933e2 100644 --- a/api/dataimport/for-trigger_test.go +++ b/api/dataimport/for-trigger_test.go @@ -492,6 +492,24 @@ func Example_ImportForTrigger_Manual_SBU_NoAutoShare() { // |{"id":"test1234sbu","title":"test1234sbu","dataTypes":[{"dataType":"SD_XRF","count":2520}],"instrument":"SBU_BREADBOARD","instrumentConfig":"StonyBrookBreadboard","meta":{"DriveID":"0","RTT":"","SCLK":"0","SOL":"","Site":"","SiteID":"0","Target":"","TargetID":"0"},"contentCounts":{"BulkSpectra":2,"DwellSpectra":0,"MaxSpectra":2,"NormalSpectra":2520,"PseudoIntensities":0},"creatorUserId":"SBUImport"} } +/* Didnt get this working when the above was changed. Problem is this still generates the user name: SBUImport, so the + premise of the test fails because it doesn't end up with no user id at that point! +func Test_ImportForTrigger_Manual_SBU_NoAutoShare_FailForPipeline(t *testing.T) { + remoteFS, log, envName, configBucket, datasetBucket, manualBucket, db := initTest("Manual_OK2", "", "") + + trigger := `{ + "datasetID": "test1234sbu", + "jobID": "dataimport-unittest123sbu" +}` + + _, err := ImportForTrigger([]byte(trigger), envName, configBucket, datasetBucket, manualBucket, db, log, remoteFS) + + // Make sure we got the error + if !strings.HasSuffix(err.Error(), "Cannot work out groups to auto-share imported dataset with") { + t.Errorf("ImportForTrigger didnt return expected error") + } +} +*/ // Import a breadboard dataset from manual uploaded zip file func Example_importForTrigger_Manual_EM() { remoteFS, log, envName, configBucket, datasetBucket, manualBucket, db := initTest("ManualEM_OK", specialUserIds.PIXLISESystemUserId, "PIXLFMGroupId") diff --git a/api/dataimport/internal/converters/pixlem/import.go b/api/dataimport/internal/converters/pixlem/import.go index 480be3bd..7fa6c785 100644 --- a/api/dataimport/internal/converters/pixlem/import.go +++ b/api/dataimport/internal/converters/pixlem/import.go @@ -18,10 +18,6 @@ package pixlem import ( - "errors" - "os" - "path/filepath" - "github.com/pixlise/core/v4/api/dataimport/internal/converters/pixlfm" "github.com/pixlise/core/v4/api/dataimport/internal/dataConvertModels" "github.com/pixlise/core/v4/core/logger" @@ -36,31 +32,11 @@ type PIXLEM struct { } func (p PIXLEM) Import(importPath string, pseudoIntensityRangesPath string, datasetIDExpected string, log logger.ILogger) (*dataConvertModels.OutputData, string, error) { - // Find the subdir - subdir := "" - - c, _ := os.ReadDir(importPath) - for _, entry := range c { - if entry.IsDir() { - // If it's not the first one, we can't do this - if len(subdir) > 0 { - return nil, "", errors.New("Found multiple subdirs, expected one in: " + importPath) - } - subdir = entry.Name() - } - } - - if len(subdir) <= 0 { - return nil, "", errors.New("Failed to find PIXL data subdir in: " + importPath) - } - - // Form the actual path to the files - subImportPath := filepath.Join(importPath, subdir) fmImporter := pixlfm.PIXLFM{} // Override importers group and detector fmImporter.SetOverrides(protos.ScanInstrument_PIXL_EM, "PIXL-EM-E2E") // Now we can import it like normal - return fmImporter.Import(subImportPath, pseudoIntensityRangesPath, datasetIDExpected, log) + return fmImporter.Import(importPath, pseudoIntensityRangesPath, datasetIDExpected, log) } From e0398db0c0f9714efe43e77204b8375210b4c7bb Mon Sep 17 00:00:00 2001 From: Peter Nemere Date: Tue, 26 Mar 2024 14:44:20 +1000 Subject: [PATCH 2/2] Added extra logging to importer, brought back check for single dir in for EM unzipped data --- .../internal/converterSelector/selector.go | 5 ++++ .../internal/converters/pixlem/import.go | 26 ++++++++++++++++++- .../internal/converters/pixlfm/import.go | 2 ++ api/endpoints/Scan.go | 13 ++++++++++ 4 files changed, 45 insertions(+), 1 deletion(-) diff --git a/api/dataimport/internal/converterSelector/selector.go b/api/dataimport/internal/converterSelector/selector.go index d75a1c12..c15c6356 100644 --- a/api/dataimport/internal/converterSelector/selector.go +++ b/api/dataimport/internal/converterSelector/selector.go @@ -50,6 +50,7 @@ func SelectDataConverter(localFS fileaccess.FileAccess, remoteFS fileaccess.File } */ // Check if it's a PIXL FM style dataset + log.Infof("Checking path \"%v\" for PIXL FM structure...", importPath) pathType, err := pixlfm.DetectPIXLFMStructure(importPath) if len(pathType) > 0 && err == nil { // We know it's a PIXL FM type dataset... it'll later be determined which one @@ -75,10 +76,14 @@ func SelectDataConverter(localFS fileaccess.FileAccess, remoteFS fileaccess.File var detectorFile dataimportModel.DetectorChoice err = localFS.ReadJSON(detPath, "", &detectorFile, false) if err == nil { + log.Infof("Loaded detector.json...") + // We found it, work out based on what's in there if strings.HasSuffix(detectorFile.Detector, "-breadboard") { + log.Infof("Assuming breadboard dataset...") return jplbreadboard.MSATestData{}, nil } else if detectorFile.Detector == "pixl-em" { + log.Infof("Assuming PIXL EM dataset...") return pixlem.PIXLEM{}, nil } } else { diff --git a/api/dataimport/internal/converters/pixlem/import.go b/api/dataimport/internal/converters/pixlem/import.go index 7fa6c785..480be3bd 100644 --- a/api/dataimport/internal/converters/pixlem/import.go +++ b/api/dataimport/internal/converters/pixlem/import.go @@ -18,6 +18,10 @@ package pixlem import ( + "errors" + "os" + "path/filepath" + "github.com/pixlise/core/v4/api/dataimport/internal/converters/pixlfm" "github.com/pixlise/core/v4/api/dataimport/internal/dataConvertModels" "github.com/pixlise/core/v4/core/logger" @@ -32,11 +36,31 @@ type PIXLEM struct { } func (p PIXLEM) Import(importPath string, pseudoIntensityRangesPath string, datasetIDExpected string, log logger.ILogger) (*dataConvertModels.OutputData, string, error) { + // Find the subdir + subdir := "" + + c, _ := os.ReadDir(importPath) + for _, entry := range c { + if entry.IsDir() { + // If it's not the first one, we can't do this + if len(subdir) > 0 { + return nil, "", errors.New("Found multiple subdirs, expected one in: " + importPath) + } + subdir = entry.Name() + } + } + + if len(subdir) <= 0 { + return nil, "", errors.New("Failed to find PIXL data subdir in: " + importPath) + } + + // Form the actual path to the files + subImportPath := filepath.Join(importPath, subdir) fmImporter := pixlfm.PIXLFM{} // Override importers group and detector fmImporter.SetOverrides(protos.ScanInstrument_PIXL_EM, "PIXL-EM-E2E") // Now we can import it like normal - return fmImporter.Import(importPath, pseudoIntensityRangesPath, datasetIDExpected, log) + return fmImporter.Import(subImportPath, pseudoIntensityRangesPath, datasetIDExpected, log) } diff --git a/api/dataimport/internal/converters/pixlfm/import.go b/api/dataimport/internal/converters/pixlfm/import.go index 78f515aa..acb117f7 100644 --- a/api/dataimport/internal/converters/pixlfm/import.go +++ b/api/dataimport/internal/converters/pixlfm/import.go @@ -85,11 +85,13 @@ func (p PIXLFM) Import(importPath string, pseudoIntensityRangesPath string, data rgbuImgDir := fileStructure{} discoImgDir := fileStructure{} + log.Infof("Checking path \"%v\" for FM dataset type", importPath) pathType, err := DetectPIXLFMStructure(importPath) if err != nil { return nil, "", err } + log.Infof("Found path \"%v\" is of type %v", importPath, pathType) if pathType == "DataDrive" { // This is the official way we receive PIXL FM data from Mars // We expect these directories to exist... diff --git a/api/endpoints/Scan.go b/api/endpoints/Scan.go index 84c49d05..34f1439a 100644 --- a/api/endpoints/Scan.go +++ b/api/endpoints/Scan.go @@ -51,6 +51,19 @@ func PutScanData(params apiRouter.ApiHandlerGenericParams) error { s3PathStart := path.Join(filepaths.DatasetUploadRoot, scanId) // NOTE: We overwrite any previous attempts without worry! + existing, err := params.Svcs.FS.ListObjects(destBucket, s3PathStart+"/") + if err == nil && len(existing) > 0 { + // Delete all that exists + msg := fmt.Sprintf("PutScan for \"%v\": Deleting existing file...\n", scanId) + for _, existingItem := range existing { + msg += existingItem + "\n" + if err := params.Svcs.FS.DeleteObject(destBucket, existingItem); err != nil { + return fmt.Errorf("Failed to delete: \"%v\", error: %v", existing, err) + } + } + + params.Svcs.Log.Infof(msg) + } // Read in body zippedData, err := io.ReadAll(params.Request.Body)