Skip to content

Commit

Permalink
cpe: speed up dictionary test
Browse files Browse the repository at this point in the history
This speeds up the "Dictionary" test by pre-processing the CPE XML
Dictionary into a flat list structure. Doing this speeds up the test
(both normal and under the race detector) by about 3x.

Signed-off-by: Hank Donnay <[email protected]>
  • Loading branch information
hdonnay committed Jul 30, 2024
1 parent 4bfe535 commit 483f858
Show file tree
Hide file tree
Showing 7 changed files with 134 additions and 53 deletions.
7 changes: 7 additions & 0 deletions toolkit/types/cpe/generate.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package cpe

//go:generate -command stringer go run golang.org/x/tools/cmd/stringer@latest
//go:generate stringer -type Attribute -linecomment
//go:generate stringer -type ValueKind
//go:generate stringer -type Relation -linecomment
//go:generate go run mkdict.go
2 changes: 0 additions & 2 deletions toolkit/types/cpe/match.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,6 @@ func (rs Relations) IsDisjoint() bool {
// Relation indicates the relation of two WFN attributes.
type Relation uint

//go:generate stringer -type Relation -linecomment

// These are the possible relations between WFNs and their components.
//
// The super- and sub-sets indicate the conventional sense, meaning a set is
Expand Down
104 changes: 104 additions & 0 deletions toolkit/types/cpe/mkdict.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
//go:build ignore

// Mkdict is a script to generate the TestDictionary harness from the official
// CPE Dictionary.
package main

import (
"compress/gzip"
"context"
"encoding/xml"
"errors"
"flag"
"fmt"
"io"
"log/slog"
"net/http"
"os"
)

const dictURL = `https://nvd.nist.gov/feeds/xml/cpe/dictionary/official-cpe-dictionary_v2.3.xml.gz`

func main() {
var code int
defer func() {
if code != 0 {
os.Exit(code)
}
}()
outfile := flag.String("o", "testdata/dictionary.list.gz", "output file")
flag.Parse()
ctx := context.Background()

out, err := os.Create(*outfile)
if err != nil {
slog.Error("unable to open out file", "error", err)
code = 1
return
}
defer out.Close()

if err := Main(ctx, out); err != nil {
slog.Error("error processing CPE dictionary", "error", err)
code = 1
return
}
}

func Main(ctx context.Context, out *os.File) error {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, dictURL, nil)
if err != nil {
return err
}

res, err := http.DefaultClient.Do(req)
if err != nil {
return err
}
defer res.Body.Close()
switch res.StatusCode {
case http.StatusOK:
default:
return fmt.Errorf("unexpected response: %s", res.Status)
}
inGz, err := gzip.NewReader(res.Body)
if err != nil {
return err
}

outGz, err := gzip.NewWriterLevel(out, gzip.BestCompression)
if err != nil {
return err
}
defer outGz.Close()

// This is a brittle loop that assumes the two versions of a bound CPE will
// be paired. This should be true of the dictionary if it's adhering to the
// schema.
//
// Doing it this way is significantly faster due to eliminating a bunch of
// book-keeping allocations.

dec := xml.NewDecoder(inGz)
for {
tok, err := dec.RawToken()
switch {
case err == nil:
case errors.Is(err, io.EOF):
return nil
default:
return err
}
switch tok := tok.(type) {
case xml.StartElement:
switch tok.Name.Local {
case "cpe-item":
io.WriteString(outGz, tok.Attr[0].Value)
outGz.Write([]byte{'\t'})
case "cpe23-item":
io.WriteString(outGz, tok.Attr[0].Value)
outGz.Write([]byte{'\n'})
}
}
}
}
Binary file added toolkit/types/cpe/testdata/dictionary.list.gz
Binary file not shown.
Binary file not shown.
4 changes: 0 additions & 4 deletions toolkit/types/cpe/wfn.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,6 @@ import (
"unicode/utf8"
)

//go:generate -command stringer go run golang.org/x/tools/cmd/stringer@latest
//go:generate stringer -type Attribute -linecomment
//go:generate stringer -type ValueKind

// Attribute is a type for enumerating the valid CPE attributes.
type Attribute int

Expand Down
70 changes: 23 additions & 47 deletions toolkit/types/cpe/wfn_test.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
package cpe

import (
"bufio"
"compress/gzip"
"encoding/xml"
"os"
"strings"
"testing"

"github.com/google/go-cmp/cmp"
Expand Down Expand Up @@ -406,62 +407,37 @@ func TestUnbinding(t *testing.T) {
}

func TestDictionary(t *testing.T) {
const fmt = "line #%02d:\nin:\t%+q\ngot:\t%q\nwant:\t%q"
t.Parallel()
const fmt = "in: %+q\ngot:\t%q\nwant:\t%q"
f, err := os.Open("testdata/official-cpe-dictionary_v2.3.xml.gz")
f, err := os.Open("testdata/dictionary.list.gz")
if err != nil {
t.Fatal(err)
}
defer f.Close()
gz, err := gzip.NewReader(f)
gz, err := gzip.NewReader(bufio.NewReader(f))
if err != nil {
t.Fatal(err)
}
defer gz.Close()

var l xmlCPEList
if err := xml.NewDecoder(gz).Decode(&l); err != nil {
t.Error(err)
}
for _, i := range l.Items {
n := i.Name
wfn, err := UnbindURI(n)
if err != nil {
t.Fatal(err)
}
got, want := wfn.BindFS(), i.Item.Name
if got != want {
t.Logf(fmt, n, got, want)
t.Logf("wfn: %#v", wfn)
t.FailNow()
}

n = i.Item.Name
wfn, err = UnbindFS(n)
if err != nil {
t.Fatal(err)
}
got, want = wfn.BindFS(), n
if got != want {
t.Logf(fmt, n, got, want)
t.Logf("wfn: %#v", wfn)
t.FailNow()
s := bufio.NewScanner(gz)
for i := 1; s.Scan(); i++ {
fs := strings.Split(s.Text(), "\t")
want := fs[1]
for _, in := range fs {
wfn, err := Unbind(in)
if err != nil {
t.Fatal(err)
t.Fatalf("%v: %#q", err, in)
}
if got := wfn.BindFS(); got != want {
t.Logf(fmt, i, in, got, want)
t.Logf("wfn: %#v", wfn)
t.Fail()
}
}
}
}

type xmlCPEList struct {
XMLName xml.Name `xml:"cpe-list"`
Items []xmlCPEItem `xml:"cpe-item"`
}

type xmlCPEItem struct {
XMLName xml.Name `xml:"cpe-item"`
Name string `xml:"name,attr"`
Item xmlCPE23Item `xml:"cpe23-item"`
}

type xmlCPE23Item struct {
XMLName xml.Name `xml:"cpe23-item"`
Name string `xml:"name,attr"`
if err := s.Err(); err != nil {
t.Error(err)
}
}

0 comments on commit 483f858

Please sign in to comment.