diff --git a/enricher/epss/epss.go b/enricher/epss/epss.go index 7e4a851e1..90b310364 100644 --- a/enricher/epss/epss.go +++ b/enricher/epss/epss.go @@ -1,20 +1,24 @@ package epss import ( + "compress/gzip" "context" + "encoding/csv" "encoding/json" "fmt" - "github.com/google/uuid" - "github.com/quay/claircore" - "github.com/quay/claircore/libvuln/driver" - "github.com/quay/zlog" "io" "net/http" "net/url" - "os" "path" "strings" "time" + + "github.com/google/uuid" + "github.com/pkg/errors" + "github.com/quay/claircore" + "github.com/quay/claircore/libvuln/driver" + "github.com/quay/claircore/pkg/tmp" + "github.com/quay/zlog" ) var ( @@ -28,6 +32,9 @@ const ( // DefaultFeeds is the default place to look for EPSS feeds. // epss_scores-YYYY-MM-DD.csv.gz needs to be specified to get all data DefaultRootUrl = `https://epss.cyentia.com/` + + // epssName is the name of the enricher + epssName = `clair.epss` ) // Enricher provides EPSS data as enrichments to a VulnerabilityReport. @@ -48,7 +55,7 @@ func (e Enricher) FetchEnrichment(ctx context.Context, fingerprint driver.Finger newUUID := uuid.New() hint := driver.Fingerprint(newUUID.String()) zlog.Info(ctx).Str("hint", string(hint)).Msg("starting fetch") - out, err := os.CreateTemp("", "enricher.epss.gz") + out, err := tmp.NewFile("", "enricher.epss.*.json") if err != nil { return nil, hint, err } @@ -64,42 +71,90 @@ func (e Enricher) FetchEnrichment(ctx context.Context, fingerprint driver.Finger filePath := fmt.Sprintf("epss_scores-%s.csv.gz", formattedDate) e.feedPath = path.Join(DefaultRootUrl, filePath) } - // Make an HTTP GET request to download the .gz file resp, err := http.Get(e.feedPath) if err != nil { return nil, "", fmt.Errorf("failed to fetch file from %s: %w", e.feedPath, err) } defer resp.Body.Close() - // Check for a successful response if resp.StatusCode != http.StatusOK { return nil, "", fmt.Errorf("failed to fetch file: received status %d", resp.StatusCode) } - _, err = io.Copy(out, resp.Body) + gzipReader, err := gzip.NewReader(resp.Body) if err != nil { - out.Close() - return nil, "", fmt.Errorf("failed to write to temporary file: %w", err) + return nil, "", fmt.Errorf("failed to decompress file: %w", err) } + defer gzipReader.Close() - // Reset file pointer to the beginning - _, err = out.Seek(0, io.SeekStart) + csvReader := csv.NewReader(gzipReader) + headers, err := csvReader.Read() // read headers if err != nil { - out.Close() - return nil, "", fmt.Errorf("failed to reset file pointer: %w", err) + return nil, "", fmt.Errorf("failed to read CSV headers: %w", err) + } + + for { + record, err := csvReader.Read() + if err == io.EOF { + break + } + if err != nil { + return nil, "", fmt.Errorf("failed to read CSV row: %w", err) + } + + // Convert CSV row to a JSON object + jsonObject := make(map[string]string) + for i, value := range record { + jsonObject[headers[i]] = value + } + + jsonLine, err := json.Marshal(jsonObject) + if err != nil { + return nil, "", fmt.Errorf("failed to encode JSON: %w", err) + } + + if _, err := out.Write(jsonLine); err != nil { + return nil, "", fmt.Errorf("failed to write JSON line to file: %w", err) + } + if _, err := out.WriteString("\n"); err != nil { // newline for each JSON object + return nil, "", fmt.Errorf("failed to write newline to file: %w", err) + } + } + + if _, err := out.Seek(0, io.SeekStart); err != nil { + return nil, hint, fmt.Errorf("unable to reset file pointer: %w", err) } - success = true + return out, hint, nil } -func (e Enricher) ParseEnrichment(ctx context.Context, closer io.ReadCloser) ([]driver.EnrichmentRecord, error) { - //TODO implement me - panic("implement me") +// ParseEnrichment implements driver.EnrichmentUpdater. +func (e *Enricher) ParseEnrichment(ctx context.Context, rc io.ReadCloser) ([]driver.EnrichmentRecord, error) { + ctx = zlog.ContextWithValues(ctx, "component", "enricher/epss/Enricher/ParseEnrichment") + // Our Fetch method actually has all the smarts w/r/t to constructing the + // records, so this is just decoding in a loop. + defer func() { + _ = rc.Close() + }() + var err error + dec := json.NewDecoder(rc) + ret := make([]driver.EnrichmentRecord, 0, 250_000) // Wild guess at initial capacity. + // This is going to allocate like mad, hold onto your butts. + for err == nil { + ret = append(ret, driver.EnrichmentRecord{}) + err = dec.Decode(&ret[len(ret)-1]) + } + zlog.Debug(ctx). + Int("count", len(ret)-1). + Msg("decoded enrichments") + if !errors.Is(err, io.EOF) { + return nil, err + } + return ret, nil } -func (e Enricher) Name() string { - //TODO implement me - panic("implement me") +func (*Enricher) Name() string { + return epssName } func (e Enricher) Enrich(ctx context.Context, getter driver.EnrichmentGetter, report *claircore.VulnerabilityReport) (string, []json.RawMessage, error) {