Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(python): skip dev group's deps for poetry #8106

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 2 additions & 13 deletions pkg/dependency/parser/python/poetry/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@ package poetry

import (
"sort"
"strings"

"github.com/BurntSushi/toml"
"golang.org/x/xerrors"

version "github.com/aquasecurity/go-pep440-version"
"github.com/aquasecurity/trivy/pkg/dependency"
"github.com/aquasecurity/trivy/pkg/dependency/parser/python"
ftypes "github.com/aquasecurity/trivy/pkg/fanal/types"
"github.com/aquasecurity/trivy/pkg/log"
xio "github.com/aquasecurity/trivy/pkg/x/io"
Expand Down Expand Up @@ -105,7 +105,7 @@ func (p *Parser) parseDependencies(deps map[string]any, pkgVersions map[string][
}

func (p *Parser) parseDependency(name string, versRange any, pkgVersions map[string][]string) (string, error) {
name = NormalizePkgName(name)
name = python.NormalizePkgName(name)
vers, ok := pkgVersions[name]
if !ok {
return "", xerrors.Errorf("no version found for %q", name)
Expand Down Expand Up @@ -149,17 +149,6 @@ func matchVersion(currentVersion, constraint string) (bool, error) {
return c.Check(v), nil
}

// NormalizePkgName normalizes the package name based on pep-0426
func NormalizePkgName(name string) string {
// The package names don't use `_`, `.` or upper case, but dependency names can contain them.
// We need to normalize those names.
// cf. https://peps.python.org/pep-0426/#name
name = strings.ToLower(name) // e.g. https://github.com/python-poetry/poetry/blob/c8945eb110aeda611cc6721565d7ad0c657d453a/poetry.lock#L819
name = strings.ReplaceAll(name, "_", "-") // e.g. https://github.com/python-poetry/poetry/blob/c8945eb110aeda611cc6721565d7ad0c657d453a/poetry.lock#L50
name = strings.ReplaceAll(name, ".", "-") // e.g. https://github.com/python-poetry/poetry/blob/c8945eb110aeda611cc6721565d7ad0c657d453a/poetry.lock#L816
return name
}

func packageID(name, ver string) string {
return dependency.ID(ftypes.Poetry, name, ver)
}
12 changes: 6 additions & 6 deletions pkg/dependency/parser/python/poetry/parse_testcase.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ package poetry
import ftypes "github.com/aquasecurity/trivy/pkg/fanal/types"

var (
// docker run --name pipenv --rm -it python@sha256:e1141f10176d74d1a0e87a7c0a0a5a98dd98ec5ac12ce867768f40c6feae2fd9 sh
// docker run --name poetry --rm -it python@sha256:e1141f10176d74d1a0e87a7c0a0a5a98dd98ec5ac12ce867768f40c6feae2fd9 sh
// apk add curl
// curl -sSL https://install.python-poetry.org | python3 -
// curl -sSL https://install.python-poetry.org | POETRY_VERSION=1.1.7 python3 -
// export PATH=/root/.local/bin:$PATH
// poetry new normal && cd normal
// poetry add [email protected]
Expand All @@ -14,9 +14,9 @@ var (
{ID: "[email protected]", Name: "pypi", Version: "2.1"},
}

// docker run --name pipenv --rm -it python@sha256:e1141f10176d74d1a0e87a7c0a0a5a98dd98ec5ac12ce867768f40c6feae2fd9 sh
// docker run --name poetry --rm -it python@sha256:e1141f10176d74d1a0e87a7c0a0a5a98dd98ec5ac12ce867768f40c6feae2fd9 sh
// apk add curl
// curl -sSL https://install.python-poetry.org | python3 -
// curl -sSL https://install.python-poetry.org | POETRY_VERSION=1.1.7 python3 -
// export PATH=/root/.local/bin:$PATH
// poetry new many && cd many
// curl -o poetry.lock https://raw.githubusercontent.com/python-poetry/poetry/c8945eb110aeda611cc6721565d7ad0c657d453a/poetry.lock
Expand Down Expand Up @@ -108,9 +108,9 @@ var (
{ID: "[email protected]", DependsOn: []string{"[email protected]"}},
}

// docker run --name pipenv --rm -it python@sha256:e1141f10176d74d1a0e87a7c0a0a5a98dd98ec5ac12ce867768f40c6feae2fd9 sh
// docker run --name poetry --rm -it python@sha256:e1141f10176d74d1a0e87a7c0a0a5a98dd98ec5ac12ce867768f40c6feae2fd9 sh
// apk add curl
// curl -sSL https://install.python-poetry.org | python3 -
// curl -sSL https://install.python-poetry.org | POETRY_VERSION=1.1.7 python3 -
// export PATH=/root/.local/bin:$PATH
// poetry new web && cd web
// poetry add [email protected]
Expand Down
30 changes: 26 additions & 4 deletions pkg/dependency/parser/python/pyproject/pyproject.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ import (
"io"

"github.com/BurntSushi/toml"
"github.com/samber/lo"
"golang.org/x/xerrors"

"github.com/aquasecurity/trivy/pkg/dependency/parser/python"
)

type PyProject struct {
Expand All @@ -16,7 +19,26 @@ type Tool struct {
}

type Poetry struct {
Dependencies map[string]any `toml:"dependencies"`
Dependencies dependencies `toml:"dependencies"`
Groups map[string]Group `toml:"group"`
}

type Group struct {
Dependencies dependencies `toml:"dependencies"`
}

type dependencies map[string]struct{}

func (d *dependencies) UnmarshalTOML(data any) error {
m, ok := data.(map[string]any)
if !ok {
return xerrors.Errorf("dependencies must be map, but got: %T", data)
}

*d = lo.MapEntries(m, func(pkgName string, _ any) (string, struct{}) {
return python.NormalizePkgName(pkgName), struct{}{}
})
return nil
}

// Parser parses pyproject.toml defined in PEP518.
Expand All @@ -28,10 +50,10 @@ func NewParser() *Parser {
return &Parser{}
}

func (p *Parser) Parse(r io.Reader) (map[string]any, error) {
func (p *Parser) Parse(r io.Reader) (PyProject, error) {
var conf PyProject
if _, err := toml.NewDecoder(r).Decode(&conf); err != nil {
return nil, xerrors.Errorf("toml decode error: %w", err)
return PyProject{}, xerrors.Errorf("toml decode error: %w", err)
}
return conf.Tool.Poetry.Dependencies, nil
return conf, nil
}
37 changes: 22 additions & 15 deletions pkg/dependency/parser/python/pyproject/pyproject_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,33 @@ func TestParser_Parse(t *testing.T) {
tests := []struct {
name string
file string
want map[string]any
want pyproject.PyProject
wantErr assert.ErrorAssertionFunc
}{
{
name: "happy path",
file: "testdata/happy.toml",
want: map[string]any{
"flask": "^1.0",
"python": "^3.9",
"requests": map[string]any{
"version": "2.28.1",
"optional": true,
},
"virtualenv": []any{
map[string]any{
"version": "^20.4.3,!=20.4.5,!=20.4.6",
},
map[string]any{
"version": "<20.16.6",
"markers": "sys_platform == 'win32' and python_version == '3.9'",
want: pyproject.PyProject{
Tool: pyproject.Tool{
Poetry: pyproject.Poetry{
Dependencies: map[string]struct{}{
"flask": {},
"python": {},
"requests": {},
"virtualenv": {},
},
Groups: map[string]pyproject.Group{
"dev": {
Dependencies: map[string]struct{}{
"pytest": {},
},
},
"lint": {
Dependencies: map[string]struct{}{
"ruff": {},
},
},
},
},
},
},
Expand Down
7 changes: 7 additions & 0 deletions pkg/dependency/parser/python/pyproject/testdata/happy.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@ virtualenv = [

[tool.poetry.dev-dependencies]

[tool.poetry.group.dev.dependencies]
pytest = "8.3.4"


[tool.poetry.group.lint.dependencies]
ruff = "0.8.3"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
14 changes: 14 additions & 0 deletions pkg/dependency/parser/python/python.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package python

import "strings"

// NormalizePkgName normalizes the package name based on pep-0426
func NormalizePkgName(name string) string {
// The package names don't use `_`, `.` or upper case, but dependency names can contain them.
// We need to normalize those names.
// cf. https://peps.python.org/pep-0426/#name
name = strings.ToLower(name) // e.g. https://github.com/python-poetry/poetry/blob/c8945eb110aeda611cc6721565d7ad0c657d453a/poetry.lock#L819
name = strings.ReplaceAll(name, "_", "-") // e.g. https://github.com/python-poetry/poetry/blob/c8945eb110aeda611cc6721565d7ad0c657d453a/poetry.lock#L50
name = strings.ReplaceAll(name, ".", "-") // e.g. https://github.com/python-poetry/poetry/blob/c8945eb110aeda611cc6721565d7ad0c657d453a/poetry.lock#L816
return name
}
39 changes: 39 additions & 0 deletions pkg/dependency/parser/python/python_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package python_test

import (
"testing"

"github.com/stretchr/testify/assert"

"github.com/aquasecurity/trivy/pkg/dependency/parser/python"
)

func Test_NormalizePkgName(t *testing.T) {
tests := []struct {
pkgName string
expected string
}{
{
pkgName: "SecretStorage",
expected: "secretstorage",
},
{
pkgName: "pywin32-ctypes",
expected: "pywin32-ctypes",
},
{
pkgName: "jaraco.classes",
expected: "jaraco-classes",
},
{
pkgName: "green_gdk",
expected: "green-gdk",
},
}

for _, tt := range tests {
t.Run(tt.pkgName, func(t *testing.T) {
assert.Equal(t, tt.expected, python.NormalizePkgName(tt.pkgName))
})
}
}
58 changes: 46 additions & 12 deletions pkg/fanal/analyzer/language/python/poetry/poetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ func (a poetryAnalyzer) parsePoetryLock(path string, r io.Reader) (*types.Applic
func (a poetryAnalyzer) mergePyProject(fsys fs.FS, dir string, app *types.Application) error {
// Parse pyproject.toml to identify the direct dependencies
path := filepath.Join(dir, types.PyProject)
p, err := a.parsePyProject(fsys, path)
project, err := a.parsePyProject(fsys, path)
if errors.Is(err, fs.ErrNotExist) {
// Assume all the packages are direct dependencies as it cannot identify them from poetry.lock
a.logger.Debug("pyproject.toml not found", log.FilePath(path))
Expand All @@ -105,34 +105,68 @@ func (a poetryAnalyzer) mergePyProject(fsys fs.FS, dir string, app *types.Applic

// Identify the direct/transitive dependencies
for i, pkg := range app.Packages {
if _, ok := p[pkg.Name]; ok {
if _, ok := project.Tool.Poetry.Dependencies[pkg.Name]; ok {
app.Packages[i].Relationship = types.RelationshipDirect
} else {
app.Packages[i].Indirect = true
app.Packages[i].Relationship = types.RelationshipIndirect
}
}

filterProdPackages(project, app)
return nil
}

func (a poetryAnalyzer) parsePyProject(fsys fs.FS, path string) (map[string]any, error) {
func filterProdPackages(project pyproject.PyProject, app *types.Application) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should flag the Dev field instead of filtering for --include-dev-deps, but if you want to keep the original behavior in this PR, we can add the change in a separate PR.

Copy link
Contributor

@DmitriyLewen DmitriyLewen Dec 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@knqyf263 as i wrote in #8080 (comment) - are you sure that we want to add dev deps under Dev field?
Previously we only did this upon request from users.

I have yet to meet a python user who needs to scan for dev dependencies

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm. It sounds reasonable, but we should add a new column showing if --include-dev-deps is supported.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But if the implementation cost is not so different, it's better to mark dev dependencies.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll open another PR for this one

packages := lo.SliceToMap(app.Packages, func(pkg types.Package) (string, types.Package) {
return pkg.ID, pkg
})

visited := make(map[string]struct{})
deps := project.Tool.Poetry.Dependencies

for group, groupDeps := range project.Tool.Poetry.Groups {
if group == "dev" {
continue
}
deps = lo.Assign(deps, groupDeps.Dependencies)
}

for _, pkg := range packages {
if _, prodDep := deps[pkg.Name]; !prodDep {
continue
}
walkPackageDeps(pkg.ID, packages, visited)
}

app.Packages = lo.Filter(app.Packages, func(pkg types.Package, _ int) bool {
_, ok := visited[pkg.ID]
return ok
})
}

func walkPackageDeps(pkgID string, packages map[string]types.Package, visited map[string]struct{}) {
if _, ok := visited[pkgID]; ok {
return
}
visited[pkgID] = struct{}{}
for _, dep := range packages[pkgID].DependsOn {
walkPackageDeps(dep, packages, visited)
}
}

func (a poetryAnalyzer) parsePyProject(fsys fs.FS, path string) (pyproject.PyProject, error) {
// Parse pyproject.toml
f, err := fsys.Open(path)
if err != nil {
return nil, xerrors.Errorf("file open error: %w", err)
return pyproject.PyProject{}, xerrors.Errorf("file open error: %w", err)
}
defer f.Close()

parsed, err := a.pyprojectParser.Parse(f)
project, err := a.pyprojectParser.Parse(f)
if err != nil {
return nil, err
return pyproject.PyProject{}, err
}

// Packages from `pyproject.toml` can use uppercase characters, `.` and `_`.
parsed = lo.MapKeys(parsed, func(_ any, pkgName string) string {
return poetry.NormalizePkgName(pkgName)
})

return parsed, nil
return project, nil
}
Loading
Loading