Skip to content

Commit

Permalink
New config options
Browse files Browse the repository at this point in the history
  • Loading branch information
Maya Sergeeva committed Dec 13, 2022
1 parent 8d00cb7 commit 0446a10
Show file tree
Hide file tree
Showing 7 changed files with 74 additions and 24 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM golang:1.14-buster as build-env
FROM golang:1.17-buster as build-env

WORKDIR /app
COPY . /app
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
IMAGE_NAME = spacetabio/prerender-go
IMAGE_VERSION = v1.0.0
IMAGE_VERSION = v1.1.2

deps:
go mod vendor
Expand Down
8 changes: 6 additions & 2 deletions configuration/defaults/prerender.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ defaults: # stage name
type: all # sitemaps, urls, all
sitemaps: [] # sitemap urls
urls: [] # additional urls to parse
get_params_to_save: [] # get params to preserve in file name
get_params_to_save: [ ] # get params to preserve in file name
wait_for: time # console, element, time - page ready lookup strategy configuration
console_string: "" # string in console that fired when DOM loading is finished and ready to read
element:
Expand All @@ -19,7 +19,11 @@ defaults: # stage name
attribute:
name: ""
value: ""
sleep_time: 3 # sleep time in sec
max_attempts: 5
sleep_time: 3s
wait_timeout: 30s
render_period: 7h
viewport:
width: 1680
height: 10000
page_404_text: "Такой страницы у нас нет"
4 changes: 4 additions & 0 deletions configuration/prerender.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,13 @@ type PrerenderConfig struct {
Lookup lookupConfig `yaml:"lookup"`
WaitFor string `yaml:"wait_for"`
ConsoleString string `yaml:"console_string"`
MaxAttempts int `yaml:"max_attempts"`
SleepTime time.Duration `yaml:"sleep_time"`
WaitTimeout time.Duration `yaml:"wait_timeout"`
RenderPeriod time.Duration `yaml:"render_period"`
Element ElementConfig `yaml:"element"`
Viewport viewportConfig `yaml:"viewport"`
Page404Text string `yaml:"page_404_text"`
}

func (ec ElementConfig) GetWaitElement() string {
Expand Down
12 changes: 10 additions & 2 deletions pkg/service/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,18 @@ import (
)

func NewService(r Repository, prerenderConfig cfg.PrerenderConfig, storageConfig cfg.StorageConfig) Service {
return &service{r, prerenderConfig, storageConfig}
lr := time.Now().Add(-prerenderConfig.RenderPeriod)

return &service{
lastRenderedAt: &lr,
r: r,
prerenderConfig: prerenderConfig,
storageConfig: storageConfig,
}
}

type service struct {
lastRenderedAt *time.Time
r Repository
prerenderConfig cfg.PrerenderConfig
storageConfig cfg.StorageConfig
Expand Down Expand Up @@ -94,7 +102,7 @@ type Service interface {

GetPageBody(ctx context.Context, p *models.PageData) error
RenderPages(pages []*models.PageData, maxWorkers int) error
RenderPage(ctx context.Context, page *models.PageData, num int) error
RenderPage(ctx context.Context, page *models.PageData, num int, total int) error

renderBodyWithElementTrigger(ctx context.Context, p *models.PageData) (string, error)
renderBodyWithTimeTrigger(ctx context.Context, p *models.PageData) (string, error)
Expand Down
30 changes: 25 additions & 5 deletions pkg/service/links.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"net/url"
"strings"
"time"

"github.com/yterajima/go-sitemap"

Expand Down Expand Up @@ -100,18 +101,37 @@ func (s *service) GetUrlsFromLinksList() ([]string, error) {
func (s *service) GetUrlsFromSitemaps() ([]string, error) {
links := make([]string, 0)

for _, url := range s.prerenderConfig.Lookup.SitemapURLs {
smap, err := sitemap.Get(url, nil)
for _, sitemapsURL := range s.prerenderConfig.Lookup.SitemapURLs {
smap, err := sitemap.Get(sitemapsURL, nil)
if err != nil {
return nil, err
}

for _, URL := range smap.URL {
if !IsInSlice(links, URL.Loc) {
links = append(links, URL.Loc)
for _, sitemapURL := range smap.URL {
if sitemapURL.LastMod == "" {
continue

}

lm, err := lastModifiedFrom(sitemapURL.LastMod)
if err != nil {
return nil, err
}

if s.lastRenderedAt.After(lm) {
continue
}

if !IsInSlice(links, sitemapURL.Loc) {
links = append(links, sitemapURL.Loc)
}
}
}

return links, nil
}

func lastModifiedFrom(lastMod string) (time.Time, error) {
// 2021-09-21T07:31:56+00:00
return time.Parse(time.RFC3339, lastMod)
}
40 changes: 27 additions & 13 deletions pkg/service/page.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@ import (
"errors"
"fmt"
"log"
//"sync"
"time"
"strings"

"github.com/chromedp/cdproto/cdp"
"github.com/chromedp/cdproto/emulation"
Expand All @@ -17,14 +16,17 @@ import (
"github.com/spacetab-io/prerender-go/pkg/models"
)

func (s service) GetPageBody(ctx context.Context, p *models.PageData) (err error) {
timeoutCtx, cancel := context.WithTimeout(ctx, 30*time.Second) //nolint:gomnd
func (s service) GetPageBody(ctx context.Context, p *models.PageData) error {
timeoutCtx, cancel := context.WithTimeout(ctx, s.prerenderConfig.WaitTimeout)
defer cancel()

newTabCtx, cancelNewTabCtx := chromedp.NewContext(timeoutCtx) // create new tab
defer cancelNewTabCtx()

var body string
var (
body string
err error
)

switch s.prerenderConfig.WaitFor {
case models.WaitForConsole:
Expand All @@ -45,6 +47,11 @@ func (s service) GetPageBody(ctx context.Context, p *models.PageData) (err error
p.ContentLength = len(body)
p.Status = 200 //TODO убрать хардкод

if s.prerenderConfig.Page404Text != "" &&
strings.ContainsAny(body, s.prerenderConfig.Page404Text) {
p.Status = 404
}

return nil
}

Expand All @@ -65,7 +72,7 @@ func (s service) renderBodyWithTimeTrigger(ctx context.Context, p *models.PageDa
err := chromedp.Run(ctx,
chromedp.Navigate(p.URL.String()),
emulation.SetDeviceMetricsOverride(s.prerenderConfig.Viewport.Width, s.prerenderConfig.Viewport.Height, 1.0, false),
chromedp.Sleep(s.prerenderConfig.SleepTime*time.Second),
chromedp.Sleep(s.prerenderConfig.SleepTime),
chromedp.OuterHTML("html", &body),
)

Expand Down Expand Up @@ -128,6 +135,7 @@ func (s *service) RenderPages(pages []*models.PageData, maxWorkers int) error {
}

sem := semaphore.NewWeighted(int64(maxWorkers))
total := len(pages)

for i, page := range pages {
// When maxWorkers goroutines are in flight, Acquire blocks until one of the
Expand All @@ -143,11 +151,17 @@ func (s *service) RenderPages(pages []*models.PageData, maxWorkers int) error {
go func() {
defer sem.Release(1)

if err := s.RenderPage(actxt, p, num); err != nil {
if err := s.RenderPage(actxt, p, num, total); err != nil {
log.Println(err)
return
}

if p.Status != 200 {
log.Printf("page http status is not 200. skip!")

return
}

p.SuccessRender = true

if err := s.r.SaveData(ctx, p); err != nil {
Expand All @@ -164,27 +178,27 @@ func (s *service) RenderPages(pages []*models.PageData, maxWorkers int) error {
return sem.Acquire(ctx, int64(maxWorkers))
}

func (s *service) RenderPage(ctx context.Context, page *models.PageData, num int) error {
func (s *service) RenderPage(ctx context.Context, page *models.PageData, num, total int) error {
if page == nil {
return errors.New("page data is nil")
}

page.Attempts++
if page.Attempts == 5 { //nolint:gomnd
if page.Attempts == s.prerenderConfig.MaxAttempts {
return fmt.Errorf("render page `%s` attempts exceeded", page.URL.String())
}

const logStatusFormat = "| %04d | %s | %d | %s"
const logStatusFormat = "| %04d/%04d | %s | %d | %s"

err := s.GetPageBody(ctx, page)
if err != nil {
log.Printf(logStatusFormat, num, "x", page.Attempts, page.URL.String())
log.Printf(logStatusFormat, num, total, "x", page.Attempts, fmt.Sprintf("%s\n%s", page.URL.String(), err.Error()))

// next attempt
return s.RenderPage(ctx, page, num)
return s.RenderPage(ctx, page, num, total)
}

log.Printf(logStatusFormat, num, "v", page.Attempts, page.URL.String())
log.Printf(logStatusFormat, num, total, "v", page.Attempts, page.URL.String())

return err
}

0 comments on commit 0446a10

Please sign in to comment.