diff --git a/api/dataimport/dataimportHelpers/beamLocation.go b/api/dataimport/dataimportHelpers/beamLocation.go index 41833b09..77d5befb 100644 --- a/api/dataimport/dataimportHelpers/beamLocation.go +++ b/api/dataimport/dataimportHelpers/beamLocation.go @@ -18,8 +18,10 @@ package dataImportHelpers import ( + "bufio" "errors" "fmt" + "os" "slices" "strconv" "strings" @@ -29,14 +31,35 @@ import ( ) // ReadBeamLocationsFile - Reads beam location CSV. Old style (expectMultipleIJ=false) or new multi-image IJ coord CSVs. -func ReadBeamLocationsFile(path string, expectMultipleIJ bool, mainImagePMC int32, ignoreColumns []string, jobLog logger.ILogger) (dataConvertModels.BeamLocationByPMC, error) { - rowsToSkip := 0 - if !expectMultipleIJ { - // If we're loading the old style test data, that had an extra header that we skip - rowsToSkip = 1 +func ReadBeamLocationsFile(beamPath string, expectMultipleIJ bool, mainImagePMC int32, ignoreColumns []string, jobLog logger.ILogger) (dataConvertModels.BeamLocationByPMC, error) { + // Find the first row that has the start of data we're interested in! + file, err := os.Open(beamPath) + if err != nil { + return nil, err + } + defer file.Close() + + scanner := bufio.NewScanner(file) + + lineNo := 0 + for scanner.Scan() { + line := scanner.Text() + lineNo++ + + if strings.HasPrefix(line, "PMC,") { + break + } + + if lineNo > 4 { + return nil, fmt.Errorf("Failed to find header row of beam location data") + } } + // read CSV - rows, err := ReadCSV(path, rowsToSkip, ',', jobLog) + if lineNo > 0 { + lineNo-- + } + rows, err := ReadCSV(beamPath, lineNo, ',', jobLog) if err != nil { return nil, err } diff --git a/api/dataimport/dataimportHelpers/readcsv.go b/api/dataimport/dataimportHelpers/readcsv.go index 1e48b1de..c969e5b5 100644 --- a/api/dataimport/dataimportHelpers/readcsv.go +++ b/api/dataimport/dataimportHelpers/readcsv.go @@ -16,6 +16,7 @@ func ReadCSV(filePath string, headerIdx int, sep rune, jobLog logger.ILogger) ([ return nil, err } + seekPos := int64(0) if headerIdx > 0 { n := 0 for n < headerIdx { @@ -24,7 +25,8 @@ func ReadCSV(filePath string, headerIdx int, sep rune, jobLog logger.ILogger) ([ if err != nil { return nil, err } - _, err = csvFile.Seek(int64(len(row1)), io.SeekStart) + seekPos += int64(len(row1)) + _, err = csvFile.Seek(seekPos, io.SeekStart) if err != nil { return nil, err } diff --git a/api/dataimport/internal/converters/jplbreadboard/contextImages.go b/api/dataimport/internal/converters/jplbreadboard/contextImages.go index 2c3d758f..a009baf8 100644 --- a/api/dataimport/internal/converters/jplbreadboard/contextImages.go +++ b/api/dataimport/internal/converters/jplbreadboard/contextImages.go @@ -18,10 +18,7 @@ package jplbreadboard import ( - "path/filepath" - "strconv" - "strings" - + "github.com/pixlise/core/v4/api/dataimport/internal/importerutils" "github.com/pixlise/core/v4/core/fileaccess" "github.com/pixlise/core/v4/core/logger" ) @@ -35,30 +32,5 @@ func processContextImages(path string, jobLog logger.ILogger, fs fileaccess.File return nil, err } - return getContextImagesPerPMCFromListing(contextImgDirFiles, jobLog), nil -} - -func getContextImagesPerPMCFromListing(paths []string, jobLog logger.ILogger) map[int32]string { - result := make(map[int32]string) - - for _, pathitem := range paths { - _, file := filepath.Split(pathitem) - extension := filepath.Ext(file) - if extension == ".jpg" { - fileNameBits := strings.Split(file, "_") - if len(fileNameBits) != 3 { - jobLog.Infof("Ignored unexpected image file name \"%v\" when searching for context images.", pathitem) - } else { - pmcStr := fileNameBits[len(fileNameBits)-1] - pmcStr = pmcStr[0 : len(pmcStr)-len(extension)] - pmcI, err := strconv.Atoi(pmcStr) - if err != nil { - jobLog.Infof("Ignored unexpected image file name \"%v\", couldn't parse PMC.", pathitem) - } else { - result[int32(pmcI)] = file - } - } - } - } - return result + return importerutils.GetContextImagesPerPMCFromListing(contextImgDirFiles, jobLog), nil } diff --git a/api/dataimport/internal/converters/jplbreadboard/import.go b/api/dataimport/internal/converters/jplbreadboard/import.go index 90d46487..04a660e6 100644 --- a/api/dataimport/internal/converters/jplbreadboard/import.go +++ b/api/dataimport/internal/converters/jplbreadboard/import.go @@ -93,7 +93,7 @@ func (m MSATestData) Import(importPath string, pseudoIntensityRangesPath string, } } - minContextPMC := getMinimumContextPMC(contextImgsPerPMC) + minContextPMC := importerutils.GetMinimumContextPMC(contextImgsPerPMC) var hkData dataConvertModels.HousekeepingData var beamLookup = make(dataConvertModels.BeamLocationByPMC) @@ -153,7 +153,7 @@ func (m MSATestData) Import(importPath string, pseudoIntensityRangesPath string, spectrafiles, _ := getSpectraFiles(allMSAFiles, verifyreadtype, jobLog) jobLog.Infof(" Found %v usable spectrum files...", len(allMSAFiles)) - spectraLookup, err := makeSpectraLookup(spectraPath, spectrafiles, params.SingleDetectorMSAs, params.GenPMCs, params.ReadTypeOverride, params.DetectorADuplicate, jobLog) + spectraLookup, err := MakeSpectraLookup(spectraPath, spectrafiles, params.SingleDetectorMSAs, params.GenPMCs, params.ReadTypeOverride, params.DetectorADuplicate, jobLog) if err != nil { return nil, "", err } @@ -236,19 +236,3 @@ func (m MSATestData) Import(importPath string, pseudoIntensityRangesPath string, data.SetPMCData(beamLookup, hkData, spectraLookup, contextImgsPerPMC, pseudoIntensityData, map[int32]string{}) return data, contextImageSrcDir, nil } - -// Check what the minimum PMC is we have a context image for -func getMinimumContextPMC(contextImgsPerPMC map[int32]string) int32 { - minContextPMC := int32(0) - - for contextPMC := range contextImgsPerPMC { - if minContextPMC == 0 || contextPMC < minContextPMC { - minContextPMC = contextPMC - } - } - if minContextPMC == 0 { - minContextPMC = 1 - } - - return minContextPMC -} diff --git a/api/dataimport/internal/converters/jplbreadboard/spectra.go b/api/dataimport/internal/converters/jplbreadboard/spectra.go index 833c68c2..781de95c 100644 --- a/api/dataimport/internal/converters/jplbreadboard/spectra.go +++ b/api/dataimport/internal/converters/jplbreadboard/spectra.go @@ -122,7 +122,7 @@ func getMSASeqNo(path string) (int64, error) { return int64(seqNo), nil } -func makeSpectraLookup(inputpath string, spectraFiles []string, singleDetectorMSAs bool, genPMCs bool, readTypeOverride string, detectorADuplicate bool, jobLog logger.ILogger) (dataConvertModels.DetectorSampleByPMC, error) { +func MakeSpectraLookup(inputpath string, spectraFiles []string, singleDetectorMSAs bool, genPMCs bool, readTypeOverride string, detectorADuplicate bool, jobLog logger.ILogger) (dataConvertModels.DetectorSampleByPMC, error) { spectraLookup := make(dataConvertModels.DetectorSampleByPMC) reportInterval := len(spectraFiles) / 10 @@ -168,7 +168,7 @@ func makeSpectraLookup(inputpath string, spectraFiles []string, singleDetectorMS return spectraLookup, fmt.Errorf("Unexpected SOURCEFILE metadata already defined in %v", path) } - s.Meta["SOURCEFILE"] = dataConvertModels.StringMetaValue(f) + s.Meta["SOURCEFILE"] = dataConvertModels.StringMetaValue(filepath.Base(f)) // Use the override if it's provided rt := readTypeOverride diff --git a/api/dataimport/internal/converters/pixlem/import.go b/api/dataimport/internal/converters/pixlem/import.go index 17ddad4e..f602fecf 100644 --- a/api/dataimport/internal/converters/pixlem/import.go +++ b/api/dataimport/internal/converters/pixlem/import.go @@ -18,14 +18,20 @@ package pixlem import ( - "errors" "fmt" "os" "path/filepath" + "strconv" + "strings" - "github.com/pixlise/core/v4/api/dataimport/internal/converters/pixlfm" + dataImportHelpers "github.com/pixlise/core/v4/api/dataimport/dataimportHelpers" + "github.com/pixlise/core/v4/api/dataimport/internal/converters/jplbreadboard" "github.com/pixlise/core/v4/api/dataimport/internal/dataConvertModels" + "github.com/pixlise/core/v4/api/dataimport/internal/importerutils" + "github.com/pixlise/core/v4/core/fileaccess" + "github.com/pixlise/core/v4/core/gdsfilename" "github.com/pixlise/core/v4/core/logger" + "github.com/pixlise/core/v4/core/utils" protos "github.com/pixlise/core/v4/generated-protos" ) @@ -37,31 +43,219 @@ type PIXLEM struct { } func (p PIXLEM) Import(importPath string, pseudoIntensityRangesPath string, datasetIDExpected string, log logger.ILogger) (*dataConvertModels.OutputData, string, error) { - // Find the subdir - subdir := "" - - c, _ := os.ReadDir(importPath) - for _, entry := range c { - if entry.IsDir() { - // If it's not the first one, we can't do this - if len(subdir) > 0 { - return nil, "", fmt.Errorf("Found multiple subdirs (\"%v\", \"%v\"), expected one in: \"%v\"", subdir, entry.Name(), importPath) + log.Infof("PIXL EM Import started for path: %v", importPath) + + fs := fileaccess.FSAccess{} + + // PIXL EM has evolved over time from being a FM-like dataset, or a bunch of MSA files, to us importing the SDF-Peek output format. We run the beam geometry tool ourselves + // when the dataset is imported, and here we're expecting the sdf peek output zip file, and the processed files we generate to make these ready to import. + // Check expected files exist + + msaFiles, err := utils.ReadFileLines(filepath.Join(importPath, "msas.txt")) + if err != nil { + log.Errorf("%v", err) + return nil, "", err + } + + imageFiles, err := utils.ReadFileLines(filepath.Join(importPath, "images.txt")) + if err != nil { + log.Errorf("%v", err) + return nil, "", err + } + + // Read all beam location files + beamLocPrefix := "beamLocation-" + beams, err := fs.ListObjects(importPath, beamLocPrefix) + + if err != nil || len(beams) <= 0 { + if err == nil { + err = fmt.Errorf("Failed to find beam location file(s)") + } + + log.Errorf("%v", err) + return nil, "", err + } + + // Extract RTTs from each beam location file name and import a dataset for each RTT + zipName, err := extractZipName(append(msaFiles, imageFiles...)) + if err != nil { + return nil, "", err + } + + for _, beamName := range beams { + log.Infof("Reading beam location file: %v", beamName) + + rttStr := beamName[len(beamLocPrefix) : len(beamName)-4] + rtt, err := strconv.Atoi(rttStr) + if err != nil { + err = fmt.Errorf("Failed to read rtt from file name: %v. Error: %v", beamName, err) + log.Infof("%v", err) + return nil, "", err + } + + // User may have specified RTT as hex or int, when we're checking which to import, check both ways + rttHex := fmt.Sprintf("%X", rtt) + if datasetIDExpected != rttStr && !strings.HasSuffix(datasetIDExpected, rttHex) { + log.Infof("Skipping beam location file: %v, RTT doesn't match expected: %v", beamName, datasetIDExpected) + continue + } + + if len(rttHex) < 8 { + rttHex = "0" + rttHex + } + rttHex = "_" + rttHex + "_" + + imageList := []string{} + for _, img := range imageFiles { + // Expecting image file names of the form: 0720239657_0C6E0205_000002.jpg + // The second part is the RTT, so we convert our RTT to hex to compare + + if strings.Contains(img, rttHex) { + fullPath := filepath.Join(importPath, zipName, img) + imageList = append(imageList, fullPath) + } + } + + msaList := []string{} + bulkMaxList := []string{} + for _, msa := range msaFiles { + // Expecting image file names of the form: 0720239657_0C6E0205_000002.jpg + // The second part is the RTT, so we convert our RTT to hex to compare + + if strings.Contains(msa, rttHex) { + + fullPath := filepath.Join(importPath, zipName, msa) + fileName := filepath.Base(msa) + + if strings.HasPrefix(fileName, "BulkSum_") || strings.HasPrefix(fileName, "MaxValue_") { + bulkMaxList = append(bulkMaxList, fullPath) + } else { + msaList = append(msaList, fullPath) + } } - subdir = entry.Name() + } + + beamPath := filepath.Join(importPath, beamName) + data, err := importEMData(rttStr, beamPath, imageList, bulkMaxList, msaList, &fs, log) + if err != nil { + log.Errorf("Import failed for %v: %v", beamName, err) + continue + } + + log.Infof("Imported scan with RTT: %v", rtt) + return data, filepath.Join(importPath, zipName, zipName), nil + } + + // If we got here, nothing was imported + return nil, "", fmt.Errorf("Expected RTT %v was not found in uploaded data", datasetIDExpected) +} + +func extractZipName(files []string) (string, error) { + zipName := "" + pathSep := string(os.PathSeparator) + + // Unfortunately when unzipped, the zip file name ends up in the path again... so we have to add it here. Once we have the zip name, + // verify all files in the list start with it + for _, f := range files { + if len(zipName) <= 0 { + pos := strings.Index(f, pathSep) + if pos == -1 { + pos = strings.Index(f, fmt.Sprintf("%v", "/")) + } + if pos > 0 { + zipName = f[0:pos] + } else { + return "", fmt.Errorf("Failed to read path root for PIXL EM importable files from: %v", f) + } + } else { + if !strings.HasPrefix(f, zipName) { + return "", fmt.Errorf("Error while reading importable files for PIXL EM: Expected path %v to start with %v", f, zipName) + } + } + } + + return zipName, nil +} +func importEMData(rtt string, beamLocPath string, imagePathList []string, bulkMaxList []string, msaList []string, fs fileaccess.FileAccess, logger logger.ILogger) (*dataConvertModels.OutputData, error) { + // Read MSAs + locSpectraLookup, err := jplbreadboard.MakeSpectraLookup("", msaList, true, false, "", false, logger) + if err != nil { + return nil, err + } + + bulkMaxSpectraLookup, err := jplbreadboard.MakeSpectraLookup("", bulkMaxList, true, false, "", false, logger) + if err != nil { + return nil, err + } + + // Read Images + contextImgsPerPMC := importerutils.GetContextImagesPerPMCFromListing(imagePathList, logger) + minContextPMC := importerutils.GetMinimumContextPMC(contextImgsPerPMC) + + // Read Beams + beamLookup, err := dataImportHelpers.ReadBeamLocationsFile(beamLocPath, true, minContextPMC, []string{"drift_x", "drift_y", "drift_z"}, logger) + if err != nil { + return nil, err + } + + // We don't have everything a full FM dataset would have... + var hkData dataConvertModels.HousekeepingData + var pseudoIntensityData dataConvertModels.PseudoIntensities + var pseudoIntensityRanges []dataConvertModels.PseudoIntensityRange + var matchedAlignedImages []dataConvertModels.MatchedAlignedImageMeta + + site := "000" + drive := "0000" + product := "???" + sol := "D000" + ftype := "??" // PE + producer := "J" + version := "01" + + // Grab the SCLK from the lowest PMC image + minPMC := int32(-1) + minFileName := "" + for pmc, img := range contextImgsPerPMC { + if minPMC < 0 || pmc < minPMC { + minPMC = pmc + minFileName = img } } - if len(subdir) <= 0 { - return nil, "", errors.New("Failed to find PIXL data subdir in: " + importPath) + if len(minFileName) <= 0 { + return nil, fmt.Errorf("Failed to find SCLK to use") } - // Form the actual path to the files - subImportPath := filepath.Join(importPath, subdir) - fmImporter := pixlfm.PIXLFM{} + parts := strings.Split(minFileName, "_") + if len(parts) != 3 { + return nil, fmt.Errorf("Unexpected image name format: %v", minFileName) + } + + sclk := parts[0] - // Override importers group and detector - fmImporter.SetOverrides(protos.ScanInstrument_PIXL_EM, "PIXL-EM-E2E") + fakeFileName := fmt.Sprintf("%v__%v_%v_000%v_N%v%v%v_______%v%v.CSV", ftype, sol, sclk, product, site, drive, rtt, producer, version) + housekeepingFileNameMeta, err := gdsfilename.ParseFileName(fakeFileName) + if err != nil { + return nil, err + } - // Now we can import it like normal - return fmImporter.Import(subImportPath, pseudoIntensityRangesPath, datasetIDExpected, log) + return importerutils.MakeFMDatasetOutput( + beamLookup, + hkData, + locSpectraLookup, + bulkMaxSpectraLookup, + contextImgsPerPMC, + pseudoIntensityData, + pseudoIntensityRanges, + matchedAlignedImages, + []dataConvertModels.ImageMeta{}, + []dataConvertModels.ImageMeta{}, + "", + housekeepingFileNameMeta, + rtt, + protos.ScanInstrument_PIXL_EM, + "", + uint32(3), + logger, + ) } diff --git a/api/dataimport/internal/importerutils/contextImages.go b/api/dataimport/internal/importerutils/contextImages.go new file mode 100644 index 00000000..38bd9dc6 --- /dev/null +++ b/api/dataimport/internal/importerutils/contextImages.go @@ -0,0 +1,67 @@ +// Licensed to NASA JPL under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. NASA JPL licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package importerutils + +import ( + "path/filepath" + "strconv" + "strings" + + "github.com/pixlise/core/v4/core/logger" +) + +// Check what the minimum PMC is we have a context image for +func GetMinimumContextPMC(contextImgsPerPMC map[int32]string) int32 { + minContextPMC := int32(0) + + for contextPMC := range contextImgsPerPMC { + if minContextPMC == 0 || contextPMC < minContextPMC { + minContextPMC = contextPMC + } + } + if minContextPMC == 0 { + minContextPMC = 1 + } + + return minContextPMC +} + +func GetContextImagesPerPMCFromListing(paths []string, jobLog logger.ILogger) map[int32]string { + result := make(map[int32]string) + + for _, pathitem := range paths { + _, file := filepath.Split(pathitem) + extension := filepath.Ext(file) + if extension == ".jpg" { + fileNameBits := strings.Split(file, "_") + if len(fileNameBits) != 3 { + jobLog.Infof("Ignored unexpected image file name \"%v\" when searching for context images.", pathitem) + } else { + pmcStr := fileNameBits[len(fileNameBits)-1] + pmcStr = pmcStr[0 : len(pmcStr)-len(extension)] + pmcI, err := strconv.Atoi(pmcStr) + if err != nil { + jobLog.Infof("Ignored unexpected image file name \"%v\", couldn't parse PMC.", pathitem) + } else { + result[int32(pmcI)] = file + } + } + } + } + return result +} diff --git a/api/dataimport/internal/converters/jplbreadboard/contextImages_test.go b/api/dataimport/internal/importerutils/contextImages_test.go similarity index 94% rename from api/dataimport/internal/converters/jplbreadboard/contextImages_test.go rename to api/dataimport/internal/importerutils/contextImages_test.go index beaf5bf6..48e1063f 100644 --- a/api/dataimport/internal/converters/jplbreadboard/contextImages_test.go +++ b/api/dataimport/internal/importerutils/contextImages_test.go @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -package jplbreadboard +package importerutils import ( "fmt" @@ -24,9 +24,9 @@ import ( "github.com/pixlise/core/v4/core/logger" ) -func Example_getContextImagesPerPMCFromListing() { +func Example_GetContextImagesPerPMCFromListing() { listing := []string{"../datasets/FM_5x5/P13177_5x5_190602/hk.txt", "../datasets/FM_5x5/P13177_5x5_190602/0612747347_000001C5_005048.jpg", "../datasets/FM_5x5/P13177_5x5_190602/0612756313_000001C5_005651.jpg", "../datasets/FM_5x5/P13177_5x5_190602/.DS_Store", "../datasets/FM_5x5/P13177_5x5_190602/0612744373_000001C5_004847.jpg", "../datasets/FM_5x5/P13177_5x5_190602/0612735422_000001C5_004244.jpg", "../datasets/FM_5x5/P13177_5x5_190602/0612758681_000001C5_005807.jpg", "../datasets/FM_5x5/P13177_5x5_190602/0612741400_000001C5_004646.jpg", "../datasets/FM_5x5/P13177_5x5_190602/hk_data.csv", "../datasets/FM_5x5/P13177_5x5_190602/0612732390_000001C5_004042.jpg", "../datasets/FM_5x5/P13177_5x5_190602/0612750353_000001C5_005249.jpg", "../datasets/FM_5x5/P13177_5x5_190602/0612738397_000001C5_004445.jpg", "../datasets/FM_5x5/P13177_5x5_190602/0612753334_000001C5_005450.jpg"} - results := getContextImagesPerPMCFromListing(listing, &logger.StdOutLogger{}) + results := GetContextImagesPerPMCFromListing(listing, &logger.StdOutLogger{}) fmt.Printf("length: %d\n", len(results)) diff --git a/api/ws/handlers/scan.go b/api/ws/handlers/scan.go index 258bf43c..0551f0f7 100644 --- a/api/ws/handlers/scan.go +++ b/api/ws/handlers/scan.go @@ -648,8 +648,8 @@ func processEM(importId string, zipReader *zip.Reader, zippedData []byte, destBu } func createBeamLocation(rsiPath string, rtt int64, outputBeamLocationPath string, logger logger.ILogger) (string, string, string, error) { - outSurfaceTop := filepath.Join(outputBeamLocationPath, fmt.Sprintf("surface_top-%v.txt", rtt)) - outRXL := filepath.Join(outputBeamLocationPath, fmt.Sprintf("beam_location_RXL-%v.txt", rtt)) + outSurfaceTop := filepath.Join(outputBeamLocationPath, fmt.Sprintf("surfaceTop-%v.txt", rtt)) + outRXL := filepath.Join(outputBeamLocationPath, fmt.Sprintf("beamLocation-%v.csv", rtt)) outLog := filepath.Join(outputBeamLocationPath, fmt.Sprintf("log-%v.txt", rtt)) logger.Infof("Generating beam location CSV from: %v", rsiPath) diff --git a/core/utils/images.go b/core/utils/images.go index 35812483..b316e95c 100644 --- a/core/utils/images.go +++ b/core/utils/images.go @@ -21,6 +21,7 @@ import ( "bytes" "fmt" "image" + _ "image/jpeg" "image/png" "os" "strings"