Skip to content

Commit

Permalink
memory cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
gosom committed Oct 20, 2024
1 parent 0940c3a commit 153bd5a
Show file tree
Hide file tree
Showing 15 changed files with 177 additions and 25 deletions.
31 changes: 21 additions & 10 deletions adapters/fetchers/jshttp/jshttp.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,13 @@ func New(headless, disableImages bool, rotator scrapemate.ProxyRotator) (scrapem

const poolSize = 10

pw, err := playwright.Run()
if err != nil {
return nil, err
}

ans := jsFetch{
pw: pw,
headless: headless,
disableImages: disableImages,
pool: make(chan *browser, poolSize),
Expand All @@ -27,6 +33,7 @@ func New(headless, disableImages bool, rotator scrapemate.ProxyRotator) (scrapem
}

type jsFetch struct {
pw *playwright.Playwright
headless bool
disableImages bool
pool chan *browser
Expand All @@ -40,7 +47,7 @@ func (o *jsFetch) GetBrowser(ctx context.Context) (*browser, error) {
case ans := <-o.pool:
return ans, nil
default:
ans, err := newBrowser(o.headless, o.disableImages, o.rotator)
ans, err := newBrowser(o.pw, o.headless, o.disableImages, o.rotator)
if err != nil {
return nil, err
}
Expand All @@ -49,6 +56,18 @@ func (o *jsFetch) GetBrowser(ctx context.Context) (*browser, error) {
}
}

func (o *jsFetch) Close() error {
close(o.pool)

for b := range o.pool {
b.Close()
}

_ = o.pw.Stop()

return nil
}

func (o *jsFetch) PutBrowser(ctx context.Context, b *browser) {
select {
case <-ctx.Done():
Expand Down Expand Up @@ -100,23 +119,16 @@ func (o *jsFetch) Fetch(ctx context.Context, job scrapemate.IJob) scrapemate.Res
}

type browser struct {
pw *playwright.Playwright
browser playwright.Browser
ctx playwright.BrowserContext
}

func (o *browser) Close() {
_ = o.ctx.Close()
_ = o.browser.Close()
_ = o.pw.Stop()
}

func newBrowser(headless, disableImages bool, rotator scrapemate.ProxyRotator) (*browser, error) {
pw, err := playwright.Run()
if err != nil {
return nil, err
}

func newBrowser(pw *playwright.Playwright, headless, disableImages bool, rotator scrapemate.ProxyRotator) (*browser, error) {
opts := playwright.BrowserTypeLaunchOptions{
Headless: playwright.Bool(headless),
Args: []string{
Expand Down Expand Up @@ -173,7 +185,6 @@ func newBrowser(headless, disableImages bool, rotator scrapemate.ProxyRotator) (
}

ans := browser{
pw: pw,
browser: br,
ctx: bctx,
}
Expand Down
4 changes: 4 additions & 0 deletions adapters/fetchers/nethttp/nethttp.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ type httpFetch struct {
netClient HTTPClient
}

func (o *httpFetch) Close() error {
return nil
}

func (o *httpFetch) Fetch(ctx context.Context, job scrapemate.IJob) scrapemate.Response {
u := job.GetFullURL()
reqBody := getBuffer()
Expand Down
3 changes: 3 additions & 0 deletions examples/books-to-scrape-simple/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,11 @@ func run() error {
}
}()

defer mate.Close()

err = mate.Start()
<-resultsDone

return err
}

Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ require (
go-simpler.org/sloglint v0.7.2 // indirect
go.uber.org/atomic v1.7.0 // indirect
go.uber.org/automaxprocs v1.5.3 // indirect
go.uber.org/mock v0.5.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
go.uber.org/zap v1.24.0 // indirect
golang.org/x/crypto v0.28.0 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,8 @@ go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw=
go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
go.uber.org/automaxprocs v1.5.3 h1:kWazyxZUrS3Gs4qUpbwo5kEIMGe/DAvi5Z4tl2NW4j8=
go.uber.org/automaxprocs v1.5.3/go.mod h1:eRbA25aqJrxAbsLO0xy5jVwPt7FQnRgjW+efnwa1WM0=
go.uber.org/mock v0.5.0 h1:KAMbZvZPyBPWgD14IrIQ38QCyjwpvVVV6K/bHl1IwQU=
go.uber.org/mock v0.5.0/go.mod h1:ge71pBPLYDk7QIi1LupWxdAykm7KIEFchiOqd6z7qMM=
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
go.uber.org/zap v1.24.0 h1:FiJd5l1UOLj0wCgbSE0rwwXHzEdAZS6hiiSnxJN/D60=
Expand Down
11 changes: 8 additions & 3 deletions mock/mock_cacher.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 21 additions & 2 deletions mock/mock_http_fetcher.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 9 additions & 4 deletions mock/mock_parser.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 8 additions & 3 deletions mock/mock_provider.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

84 changes: 84 additions & 0 deletions mock/mock_proxy_rotator.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 153bd5a

Please sign in to comment.