diff --git a/enricher/epss/epss.go b/enricher/epss/epss.go index c3642099f..97e96db32 100644 --- a/enricher/epss/epss.go +++ b/enricher/epss/epss.go @@ -1,9 +1,9 @@ package epss import ( + "bufio" "compress/gzip" "context" - "encoding/csv" "encoding/json" "fmt" "github.com/google/uuid" @@ -99,9 +99,8 @@ func (e *Enricher) Configure(ctx context.Context, f driver.ConfigUnmarshaler, c return nil } -func (e *Enricher) FetchEnrichment(ctx context.Context, fingerprint driver.Fingerprint) (io.ReadCloser, driver.Fingerprint, error) { +func (e *Enricher) FetchEnrichment(ctx context.Context, _ driver.Fingerprint) (io.ReadCloser, driver.Fingerprint, error) { ctx = zlog.ContextWithValues(ctx, "component", "enricher/epss/Enricher/FetchEnrichment") - // Force a new hint, to signal updaters that this is new data. newUUID := uuid.New() hint := driver.Fingerprint(newUUID.String()) zlog.Info(ctx).Str("hint", string(hint)).Msg("starting fetch") @@ -139,29 +138,42 @@ func (e *Enricher) FetchEnrichment(ctx context.Context, fingerprint driver.Finge } defer gzipReader.Close() - csvReader := csv.NewReader(gzipReader) - headers, err := csvReader.Read() // Column names - if err != nil { - return nil, "", fmt.Errorf("failed to read CSV headers: %w", err) - } - + scanner := bufio.NewScanner(gzipReader) + var headers []string enc := json.NewEncoder(out) totalCVEs := 0 - for { - record, err := csvReader.Read() - if err == io.EOF { - break + // get headers + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if strings.HasPrefix(line, "#") || line == "" { + continue // Skip comment or empty lines } - if err != nil { - return nil, "", fmt.Errorf("failed to read CSV row: %w", err) + headers = strings.Split(line, ",") + break + } + + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if strings.HasPrefix(line, "#") || line == "" { + continue + } + + record := strings.Split(line, ",") + if len(record) != len(headers) { + zlog.Warn(ctx).Str("line", line).Msg("skipping line with mismatched fields") + continue // Skip lines with mismatched number of fields } item := make(map[string]string) for i, value := range record { item[headers[i]] = value } + enrichment, err := json.Marshal(item) + if err != nil { + return nil, "", fmt.Errorf("failed to encode enrichment: %w", err) + } r := driver.EnrichmentRecord{ Tags: []string{item["cve"]}, @@ -169,11 +181,15 @@ func (e *Enricher) FetchEnrichment(ctx context.Context, fingerprint driver.Finge } if err = enc.Encode(&r); err != nil { - return nil, "", fmt.Errorf("encoding enrichment: %w", err) + return nil, "", fmt.Errorf("failed to write JSON line to file: %w", err) } totalCVEs++ } + if err := scanner.Err(); err != nil { + return nil, "", fmt.Errorf("error reading file: %w", err) + } + zlog.Info(ctx).Int("totalCVEs", totalCVEs).Msg("processed CVEs") if _, err := out.Seek(0, io.SeekStart); err != nil { return nil, hint, fmt.Errorf("unable to reset file pointer: %w", err) diff --git a/enricher/epss/epss_test.go b/enricher/epss/epss_test.go index 27d7929e6..a7c7f9beb 100644 --- a/enricher/epss/epss_test.go +++ b/enricher/epss/epss_test.go @@ -4,6 +4,7 @@ import ( "compress/gzip" "context" "errors" + "github.com/quay/claircore/libvuln/driver" "github.com/quay/zlog" "io" "net/http" @@ -104,6 +105,48 @@ func (tc configTestcase) Run(ctx context.Context) func(*testing.T) { } } +func TestFetch(t *testing.T) { + t.Parallel() + ctx := zlog.Test(context.Background(), t) + srv := mockServer(t) + + tt := []fetchTestcase{ + { + Name: "Fetch OK", // Tests successful fetch and data processing + Check: func(t *testing.T, rc io.ReadCloser, fp driver.Fingerprint, err error) { + if err != nil { + t.Errorf("unexpected error: %v", err) + return + } + defer rc.Close() + if rc == nil { + t.Error("expected non-nil ReadCloser for initial fetch") + } + if fp == driver.Fingerprint("") { + t.Error("expected non-empty fingerprint") + } + + // Further check if data is correctly read and structured + data, err := io.ReadAll(rc) + if err != nil { + t.Errorf("failed to read enrichment data: %v", err) + } + t.Logf("enrichment data: %s", string(data)) + }, + }, + } + + for _, tc := range tt { + t.Run(tc.Name, tc.Run(ctx, srv)) + } +} + +type fetchTestcase struct { + Check func(*testing.T, io.ReadCloser, driver.Fingerprint, error) + Name string + Hint string +} + type configTestcase struct { Config func(interface{}) error Check func(*testing.T, error) @@ -139,3 +182,36 @@ func mockServer(t *testing.T) *httptest.Server { t.Cleanup(srv.Close) return srv } + +func (tc fetchTestcase) Run(ctx context.Context, srv *httptest.Server) func(*testing.T) { + return func(t *testing.T) { + e := &Enricher{} + ctx := zlog.Test(ctx, t) + configFunc := func(i interface{}) error { + cfg, ok := i.(*Config) + if !ok { + t.Fatal("expected Config type for i, but got a different type") + } + u := srv.URL + "/data.csv.gz" + cfg.FeedRoot = &u + return nil + } + + // Configure Enricher with mock server client and custom config + if err := e.Configure(ctx, configFunc, srv.Client()); err != nil { + t.Errorf("unexpected error: %v", err) + return + } + + // Run FetchEnrichment and validate the result using Check + rc, fp, err := e.FetchEnrichment(ctx, driver.Fingerprint(tc.Hint)) + if rc != nil { + defer rc.Close() + } + if tc.Check != nil { + tc.Check(t, rc, fp, err) + } else if err != nil { + t.Errorf("unexpected error: %v", err) + } + } +}