Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Possible memory leak from calling functions #257

Open
myaaaaaaaaa opened this issue Jun 24, 2024 · 3 comments
Open

Possible memory leak from calling functions #257

myaaaaaaaaa opened this issue Jun 24, 2024 · 3 comments

Comments

@myaaaaaaaaa
Copy link

Running the following script will result in unbounded memory growth:

gojq -n 'repeat("test") | select(false)'

However, manually inlining the body of select() will avoid triggering this issue:

gojq -n 'repeat("test") | if false then . else empty end'
@myaaaaaaaaa
Copy link
Author

myaaaaaaaaa commented Jun 25, 2024

A more easily reproducible test case below:

package main_test

import (
	"runtime"
	"strings"
	"testing"

	"github.com/itchyny/gojq"
)

func TestMemoryLeak(t *testing.T) {
	// These should output 10 values.
	testCases := []string{
		`range(50000) | select((.%5000) == 0)`,
		`range(50000) | if ((.%5000) == 0) then . else empty end`,
	}
	for _, tc := range testCases {
		t.Run(strings.ReplaceAll(tc, " ", ""), func(t *testing.T) {
			query, err := gojq.Parse(tc)
			if err != nil {
				t.Fatal(err)
			}
			iter := query.Run(nil)
			memUsage := [10]float32{}
			for i := range memUsage {
				var memStats runtime.MemStats
				runtime.GC()
				runtime.ReadMemStats(&memStats)
				memUsage[i] = float32(memStats.HeapAlloc)
				if _, ok := iter.Next(); !ok {
					t.Fatal("unexpected end of iterator")
				}
			}
			if _, ok := iter.Next(); ok {
				t.Fatal("unexpected extra result from iterator")
			}
			// Ignore the first and last values (likely outliers) just in case.
			// This formula may need adjusting.
			if memUsage[1]*1.2 < memUsage[8] { // if true {
				t.Error("Possible memory leak detected:")
				for _, m := range memUsage {
					t.Errorf("%.1f kB", m/1024)
				}
			}
		})
	}
}

outputs:

=== RUN   TestMemoryLeak
=== RUN   TestMemoryLeak/range(50000)|select((.%5000)==0)
    hello_test.go:40: Possible memory leak detected:
    hello_test.go:42: 247.1 kB
    hello_test.go:42: 247.2 kB
    hello_test.go:42: 623.4 kB
    hello_test.go:42: 1001.7 kB
    hello_test.go:42: 1603.9 kB
    hello_test.go:42: 1758.2 kB
    hello_test.go:42: 1912.4 kB
    hello_test.go:42: 2962.7 kB
    hello_test.go:42: 3116.9 kB
    hello_test.go:42: 3271.2 kB
=== RUN   TestMemoryLeak/range(50000)|if((.%5000)==0)then.elseemptyend
--- FAIL: TestMemoryLeak (0.06s)
    --- FAIL: TestMemoryLeak/range(50000)|select((.%5000)==0) (0.04s)
    --- PASS: TestMemoryLeak/range(50000)|if((.%5000)==0)then.elseemptyend (0.02s)
FAIL
exit status 1
FAIL	hello	0.063s

@wader
Copy link
Contributor

wader commented Jun 25, 2024

Hey! haven't looked closer but it sounds similar to some digging i did for #86

@myaaaaaaaaa
Copy link
Author

myaaaaaaaaa commented Jul 5, 2024

Thanks for the pointers! pprof highlights these lines in env.Next() for me:

      .    222:	case opcallrec:
      .    223:		pc, callpc, index = code.v.(int), -1, env.scopes.index
      .    224:		goto loop
      .    225:	case oppushpc:
63.50MB    226:		env.push([2]int{code.v.(int), env.scopes.index})
      .    227:	case opcallpc:
      .    228:		xs := env.pop().([2]int)
      .    229:		pc, callpc, index = xs[0], pc, xs[1]
      .    230:		goto loop
      .    231:	case opscope:
      .    232:		xs := code.v.([3]int)
      .    233:		var saveindex, outerindex int
      .    234:		if index == env.scopes.index {
      .    235:			if callpc >= 0 {
      .    236:				saveindex = index
      .    237:			} else {
      .    238:				callpc, saveindex = env.popscope()
      .    239:			}
      .    240:		} else {
      .    241:			saveindex, _ = env.scopes.save()
      .    242:			env.scopes.index = index
      .    243:		}
      .    244:		if outerindex = index; outerindex >= 0 {
      .    245:			if s := env.scopes.data[outerindex].value; s.id == xs[0] {
      .    246:				outerindex = s.outerindex
      .    247:			}
      .    248:		}
      .    249:		env.scopes.push(scope{xs[0], env.offset, callpc, saveindex, outerindex})
      .    250:		env.offset += xs[1]
      .    251:		if env.offset > len(env.values) {
319.82MB   252:			vs := make([]any, env.offset*2)
      .    253:			copy(vs, env.values)
      .    254:			env.values = vs
      .    255:		}

Benchmark code for reproduction:

package main_test

import (
	"strings"
	"testing"

	"github.com/itchyny/gojq"
)

func BenchmarkSelect(b *testing.B) {
	benchCases := []string{
		`range(.) | select(false)`,
		`range(.) | if (false) then . else empty end`,
	}

	for _, bc := range benchCases {
		query, err := gojq.Parse(bc)
		if err != nil {
			b.Fatal(err)
		}

		b.Run(strings.ReplaceAll(bc, " ", ""), func(b *testing.B) {
			iter := query.Run(b.N)
			for {
				_, ok := iter.Next()
				if !ok {
					break
				}
			}
		})
	}
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants