Skip to content

Commit

Permalink
epss: make file reader more generic
Browse files Browse the repository at this point in the history
Signed-off-by: daynewlee <[email protected]>`
  • Loading branch information
daynewlee committed Nov 14, 2024
1 parent 517a19c commit d1bd62b
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 16 deletions.
48 changes: 32 additions & 16 deletions enricher/epss/epss.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package epss

import (
"bufio"
"compress/gzip"
"context"
"encoding/csv"
"encoding/json"
"fmt"
"github.com/google/uuid"
Expand Down Expand Up @@ -99,9 +99,8 @@ func (e *Enricher) Configure(ctx context.Context, f driver.ConfigUnmarshaler, c
return nil
}

func (e *Enricher) FetchEnrichment(ctx context.Context, fingerprint driver.Fingerprint) (io.ReadCloser, driver.Fingerprint, error) {
func (e *Enricher) FetchEnrichment(ctx context.Context, _ driver.Fingerprint) (io.ReadCloser, driver.Fingerprint, error) {
ctx = zlog.ContextWithValues(ctx, "component", "enricher/epss/Enricher/FetchEnrichment")
// Force a new hint, to signal updaters that this is new data.
newUUID := uuid.New()
hint := driver.Fingerprint(newUUID.String())
zlog.Info(ctx).Str("hint", string(hint)).Msg("starting fetch")
Expand Down Expand Up @@ -139,41 +138,58 @@ func (e *Enricher) FetchEnrichment(ctx context.Context, fingerprint driver.Finge
}
defer gzipReader.Close()

csvReader := csv.NewReader(gzipReader)
headers, err := csvReader.Read() // Column names
if err != nil {
return nil, "", fmt.Errorf("failed to read CSV headers: %w", err)
}

scanner := bufio.NewScanner(gzipReader)
var headers []string
enc := json.NewEncoder(out)
totalCVEs := 0

for {
record, err := csvReader.Read()
if err == io.EOF {
break
// get headers
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if strings.HasPrefix(line, "#") || line == "" {
continue // Skip comment or empty lines
}
if err != nil {
return nil, "", fmt.Errorf("failed to read CSV row: %w", err)
headers = strings.Split(line, ",")
break
}

for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if strings.HasPrefix(line, "#") || line == "" {
continue
}

record := strings.Split(line, ",")
if len(record) != len(headers) {
zlog.Warn(ctx).Str("line", line).Msg("skipping line with mismatched fields")
continue // Skip lines with mismatched number of fields
}

item := make(map[string]string)
for i, value := range record {
item[headers[i]] = value
}

enrichment, err := json.Marshal(item)
if err != nil {
return nil, "", fmt.Errorf("failed to encode enrichment: %w", err)
}

r := driver.EnrichmentRecord{
Tags: []string{item["cve"]},
Enrichment: enrichment,
}

if err = enc.Encode(&r); err != nil {
return nil, "", fmt.Errorf("encoding enrichment: %w", err)
return nil, "", fmt.Errorf("failed to write JSON line to file: %w", err)
}
totalCVEs++
}

if err := scanner.Err(); err != nil {
return nil, "", fmt.Errorf("error reading file: %w", err)
}

zlog.Info(ctx).Int("totalCVEs", totalCVEs).Msg("processed CVEs")
if _, err := out.Seek(0, io.SeekStart); err != nil {
return nil, hint, fmt.Errorf("unable to reset file pointer: %w", err)
Expand Down
76 changes: 76 additions & 0 deletions enricher/epss/epss_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"compress/gzip"
"context"
"errors"
"github.com/quay/claircore/libvuln/driver"
"github.com/quay/zlog"
"io"
"net/http"
Expand Down Expand Up @@ -104,6 +105,48 @@ func (tc configTestcase) Run(ctx context.Context) func(*testing.T) {
}
}

func TestFetch(t *testing.T) {
t.Parallel()
ctx := zlog.Test(context.Background(), t)
srv := mockServer(t)

tt := []fetchTestcase{
{
Name: "Fetch OK", // Tests successful fetch and data processing
Check: func(t *testing.T, rc io.ReadCloser, fp driver.Fingerprint, err error) {
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
defer rc.Close()
if rc == nil {
t.Error("expected non-nil ReadCloser for initial fetch")
}
if fp == driver.Fingerprint("") {
t.Error("expected non-empty fingerprint")
}

// Further check if data is correctly read and structured
data, err := io.ReadAll(rc)
if err != nil {
t.Errorf("failed to read enrichment data: %v", err)
}
t.Logf("enrichment data: %s", string(data))
},
},
}

for _, tc := range tt {
t.Run(tc.Name, tc.Run(ctx, srv))
}
}

type fetchTestcase struct {
Check func(*testing.T, io.ReadCloser, driver.Fingerprint, error)
Name string
Hint string
}

type configTestcase struct {
Config func(interface{}) error
Check func(*testing.T, error)
Expand Down Expand Up @@ -139,3 +182,36 @@ func mockServer(t *testing.T) *httptest.Server {
t.Cleanup(srv.Close)
return srv
}

func (tc fetchTestcase) Run(ctx context.Context, srv *httptest.Server) func(*testing.T) {
return func(t *testing.T) {
e := &Enricher{}
ctx := zlog.Test(ctx, t)
configFunc := func(i interface{}) error {
cfg, ok := i.(*Config)
if !ok {
t.Fatal("expected Config type for i, but got a different type")
}
u := srv.URL + "/data.csv.gz"
cfg.FeedRoot = &u
return nil
}

// Configure Enricher with mock server client and custom config
if err := e.Configure(ctx, configFunc, srv.Client()); err != nil {
t.Errorf("unexpected error: %v", err)
return
}

// Run FetchEnrichment and validate the result using Check
rc, fp, err := e.FetchEnrichment(ctx, driver.Fingerprint(tc.Hint))
if rc != nil {
defer rc.Close()
}
if tc.Check != nil {
tc.Check(t, rc, fp, err)
} else if err != nil {
t.Errorf("unexpected error: %v", err)
}
}
}

0 comments on commit d1bd62b

Please sign in to comment.