Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Css 4824/expire audit logs #1007

Merged
merged 17 commits into from
Jul 25, 2023
5 changes: 5 additions & 0 deletions charms/jimm-k8s/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
# See LICENSE file for licensing details.

options:
audit-log-retention-period:
type: string
description: |
How long to hold audit logs for in days, i.e., 10 = 10 days.
Logs are purged at 9AM UTC. Defaults to 30 days.
ale8k marked this conversation as resolved.
Show resolved Hide resolved
candid-agent-private-key:
type: string
description: |
Expand Down
3 changes: 2 additions & 1 deletion charms/jimm-k8s/src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ def _update_workload(self, event):
config_values = {
"CANDID_PUBLIC_KEY": self.config.get("candid-public-key", ""),
"CANDID_URL": self.config.get("candid-url", ""),
"JIMM_AUDIT_LOG_RETENTION_PERIOD": self.config.get("audit-log-retention-period", "30"),
ale8k marked this conversation as resolved.
Show resolved Hide resolved
"JIMM_ADMINS": self.config.get("controller-admins", ""),
"JIMM_DNS_NAME": dns_name,
"JIMM_LOG_LEVEL": self.config.get("log-level", ""),
Expand All @@ -256,7 +257,7 @@ def _update_workload(self, event):
"OPENFGA_TOKEN": self._state.openfga_token,
"OPENFGA_PORT": self._state.openfga_port,
"PRIVATE_KEY": self.config.get("private-key", ""),
"PUBLIC_KEY": self.config.get("public-key", ""),
"PUBLIC_KEY": self.config.get("public-key", "")
}
if self._state.dsn:
config_values["JIMM_DSN"] = self._state.dsn
Expand Down
5 changes: 5 additions & 0 deletions charms/jimm/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
# See LICENSE file for licensing details.

options:
audit-log-retention-period:
ale8k marked this conversation as resolved.
Show resolved Hide resolved
type: string
description: |
How long to hold audit logs for in days, i.e., 10 = 10 days.
Logs are purged at 9AM UTC. Defaults to 30 days.
ale8k marked this conversation as resolved.
Show resolved Hide resolved
candid-agent-private-key:
type: string
description: |
Expand Down
1 change: 1 addition & 0 deletions charms/jimm/src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ def _on_config_changed(self, _):
"dashboard_location": self.config.get("juju-dashboard-location"),
"public_key": self.config.get("public-key"),
"private_key": self.config.get("private-key"),
"audit_rentention_period": self.config.get("audit-log-retention-period", "30")
}

with open(self._env_filename(), "wt") as f:
Expand Down
1 change: 1 addition & 0 deletions charms/jimm/templates/jimm.env
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
BAKERY_AGENT_FILE={{bakery_agent_file}}
CANDID_URL={{candid_url}}
JIMM_ADMINS={{admins}}
JIMM_AUDIT_LOG_RETENTION_PERIOD={{audit_rentention_period}}
{% if dashboard_location %}
JIMM_DASHBOARD_LOCATION={{dashboard_location}}
{% endif %}
Expand Down
5 changes: 3 additions & 2 deletions cmd/jimmsrv/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,9 @@ func start(ctx context.Context, s *service.Service) error {
Token: os.Getenv("OPENFGA_TOKEN"),
Port: os.Getenv("OPENFGA_PORT"),
},
PrivateKey: os.Getenv("BAKERY_PRIVATE_KEY"),
PublicKey: os.Getenv("BAKERY_PUBLIC_KEY"),
PrivateKey: os.Getenv("BAKERY_PRIVATE_KEY"),
PublicKey: os.Getenv("BAKERY_PUBLIC_KEY"),
AuditLogRetentionPeriod: os.Getenv("JIMM_AUDIT_LOG_RETENTION_PERIOD"),
})
if err != nil {
return err
Expand Down
1 change: 1 addition & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ services:
JIMM_TEST_PGXDSN: ""
JIMM_JWT_EXPIRY: 30s
JIMM_ENABLE_JWKS_ROTATOR: "1"
JIMM_AUDIT_LOG_RETENTION_PERIOD: "1"
ale8k marked this conversation as resolved.
Show resolved Hide resolved
TEST_LOGGING_CONFIG: ""
PUBLIC_KEY: "izcYsQy3TePp6bLjqOo3IRPFvkQd2IKtyODGqC6SdFk="
PRIVATE_KEY: "ly/dzsI9Nt/4JxUILQeAX79qZ4mygDiuYGqc2ZEiDEc="
Expand Down
10 changes: 10 additions & 0 deletions internal/db/audit.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,13 @@ func (d *Database) ForEachAuditLogEntry(ctx context.Context, filter AuditLogFilt
}
return nil
}

func (d *Database) CleanupAuditLogs(ctx context.Context, auditLogRetentionPeriod int) (int64, error) {
ale8k marked this conversation as resolved.
Show resolved Hide resolved
retentionDate := time.Now().AddDate(0, 0, -(auditLogRetentionPeriod))
ale8k marked this conversation as resolved.
Show resolved Hide resolved
tx := d.DB.
WithContext(ctx).
Unscoped().
Where("time < ?", retentionDate).
Delete(&dbmodel.AuditLogEntry{})
return tx.RowsAffected, tx.Error
}
47 changes: 47 additions & 0 deletions internal/db/auditlog_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,3 +149,50 @@ func (s *dbSuite) TestForEachAuditLogEntry(c *qt.C) {
c.Check(calls, qt.Equals, 1)
c.Check(err, qt.DeepEquals, testError)
}

func (s *dbSuite) TestCleanupAuditLogs(c *qt.C) {
ctx := context.Background()
now := time.Now()

err := s.Database.AddAuditLogEntry(ctx, &dbmodel.AuditLogEntry{
ale8k marked this conversation as resolved.
Show resolved Hide resolved
Time: now.AddDate(0, 0, -1),
})
c.Check(errors.ErrorCode(err), qt.Equals, errors.CodeUpgradeInProgress)

err = s.Database.Migrate(context.Background(), true)
c.Assert(err, qt.IsNil)

// A log from 1 day ago
c.Assert(s.Database.AddAuditLogEntry(ctx, &dbmodel.AuditLogEntry{
Time: now.AddDate(0, 0, -1),
}), qt.IsNil)

// A log from 2 days ago
c.Assert(s.Database.AddAuditLogEntry(ctx, &dbmodel.AuditLogEntry{
Time: now.AddDate(0, 0, -2),
}), qt.IsNil)

// A log from 3 days ago
c.Assert(s.Database.AddAuditLogEntry(ctx, &dbmodel.AuditLogEntry{
Time: now.AddDate(0, 0, -3),
}), qt.IsNil)

// Ensure 3 exist
logs := make([]dbmodel.AuditLogEntry, 0)
err = s.Database.DB.Find(&logs).Error
c.Assert(err, qt.IsNil)
c.Assert(logs, qt.HasLen, 3)

// Delete all 2 or more days older, leaving 1 log left
deleted, err := s.Database.CleanupAuditLogs(ctx, 2)
c.Assert(err, qt.IsNil)

// Check that 2 were infact deleted
c.Assert(deleted, qt.Equals, int64(2))

// Check only 1 remains
logs = make([]dbmodel.AuditLogEntry, 0)
err = s.Database.DB.Find(&logs).Error
c.Assert(err, qt.IsNil)
c.Assert(logs, qt.HasLen, 1)
}
73 changes: 73 additions & 0 deletions internal/jimm/audit_log.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/juju/zaputil/zapctx"
"go.uber.org/zap"

"github.com/canonical/jimm/internal/db"
"github.com/canonical/jimm/internal/dbmodel"
"github.com/canonical/jimm/internal/servermon"
)
Expand Down Expand Up @@ -127,3 +128,75 @@ func (o recorder) HandleReply(r rpc.Request, header *rpc.Header, body interface{
servermon.WebsocketRequestDuration.WithLabelValues(r.Type, r.Action).Observe(float64(d) / float64(time.Second))
return o.logger.LogResponse(r, header, body)
}

// AuditLogCleanupService is a service capable of cleaning up audit logs
// on a defined retention period. The retention period is in DAYS.
type auditLogCleanupService struct {
ctx context.Context
ale8k marked this conversation as resolved.
Show resolved Hide resolved
auditLogRetentionPeriod int
db db.Database
}

// pollTimeOfDay holds the time hour, minutes and seconds to poll at.
type pollTimeOfDay struct {
Hours int
Minutes int
Seconds int
}

var pollDuration = pollTimeOfDay{
Hours: 9,
}

// NewAuditLogCleanupService returns a service capable of cleaning up audit logs
// on a defined retention period. The retention period is in DAYS.
func NewAuditLogCleanupService(ctx context.Context, db db.Database, auditLogRetentionPeriod int) *auditLogCleanupService {
return &auditLogCleanupService{
ctx: ctx,
auditLogRetentionPeriod: auditLogRetentionPeriod,
ale8k marked this conversation as resolved.
Show resolved Hide resolved
db: db,
}
}

// Start begins a routine which checks daily for any logs
ale8k marked this conversation as resolved.
Show resolved Hide resolved
// needed to be cleaned up.
func (a *auditLogCleanupService) Start() {
go a.poll()
}

// calculateNextPollDuration returns the next duration to poll on.
// We recalculate each time and not rely on running every 24 hours
// for absolute consistency within ns apart.
func (a *auditLogCleanupService) calculateNextPollDuration() time.Duration {
ale8k marked this conversation as resolved.
Show resolved Hide resolved
now := time.Now().UTC()
midDayUTC := time.Date(
ale8k marked this conversation as resolved.
Show resolved Hide resolved
now.Year(),
now.Month(),
now.Day(),
pollDuration.Hours,
pollDuration.Minutes,
pollDuration.Seconds,
0,
now.Location(),
)
return midDayUTC.Sub(now)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A little bit confused by this function, we take the current year,month,day and the polling hours,minutes, seconds then we subtract the current time. I'll just think on this some more

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good spot, negative hours will break this. It needs solving. Will update comment.

}

// poll is designed to be run in a routine where it can be cancelled safely
// from the service's context. It calculates the poll duration at 9am each day
// UTC.
func (a *auditLogCleanupService) poll() {
for {
select {
case <-time.After(a.calculateNextPollDuration()):
deleted, err := a.db.CleanupAuditLogs(a.ctx, a.auditLogRetentionPeriod)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think it would be very useful for us to monitor the duration of cleanup each day.. could you please add metrics for this and the number of deleted audit entries?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do! Will update this comment when done

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added histogram and haven't added total number of deleted logs, because they're gone anyway?

if err != nil {
zapctx.Error(a.ctx, "failed to cleanup audit logs", zap.Error(err))
continue
}
zapctx.Debug(a.ctx, "audit log cleanup run successfully", zap.Int64("count", deleted))
case <-a.ctx.Done():
return
ale8k marked this conversation as resolved.
Show resolved Hide resolved
}
}
}
68 changes: 68 additions & 0 deletions internal/jimm/audit_log_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// Copyright 2023 Canonical Ltd.

package jimm_test

import (
"context"
"testing"
"time"

"github.com/CanonicalLtd/jimm/internal/db"
ale8k marked this conversation as resolved.
Show resolved Hide resolved
"github.com/CanonicalLtd/jimm/internal/dbmodel"
"github.com/CanonicalLtd/jimm/internal/errors"
"github.com/CanonicalLtd/jimm/internal/jimm"
"github.com/CanonicalLtd/jimm/internal/jimmtest"
qt "github.com/frankban/quicktest"
ale8k marked this conversation as resolved.
Show resolved Hide resolved
)

func TestAuditLogCleanupServicePurgesLogs(t *testing.T) {
c := qt.New(t)

ctx := context.Background()
now := time.Now().UTC().Round(time.Millisecond)

db := db.Database{
DB: jimmtest.MemoryDB(c, func() time.Time { return now }),
}

err := db.AddAuditLogEntry(ctx, &dbmodel.AuditLogEntry{
Time: now.AddDate(0, 0, -1),
})
c.Check(errors.ErrorCode(err), qt.Equals, errors.CodeUpgradeInProgress)

err = db.Migrate(context.Background(), true)
c.Assert(err, qt.IsNil)

// A log from 1 day ago
c.Assert(db.AddAuditLogEntry(ctx, &dbmodel.AuditLogEntry{
Time: now.AddDate(0, 0, -1),
}), qt.IsNil)

// A log from 2 days ago
c.Assert(db.AddAuditLogEntry(ctx, &dbmodel.AuditLogEntry{
Time: now.AddDate(0, 0, -2),
}), qt.IsNil)

// A log from 3 days ago
c.Assert(db.AddAuditLogEntry(ctx, &dbmodel.AuditLogEntry{
Time: now.AddDate(0, 0, -3),
}), qt.IsNil)

// Check 3 created
logs := make([]dbmodel.AuditLogEntry, 0)
err = db.DB.Find(&logs).Error
c.Assert(err, qt.IsNil)
c.Assert(logs, qt.HasLen, 3)

jimm.PollDuration.Hours = now.Hour()
jimm.PollDuration.Minutes = now.Minute()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Setting these values isn't doing anything since we don't use them in the function.

jimm.PollDuration.Seconds = now.Second() + 2
svc := jimm.NewAuditLogCleanupService(ctx, db, 1)
svc.Start()

// Check 2 were purged
logs = make([]dbmodel.AuditLogEntry, 0)
err = db.DB.Find(&logs).Error
c.Assert(err, qt.IsNil)
c.Assert(logs, qt.HasLen, 3)
}
1 change: 1 addition & 0 deletions internal/jimm/export_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (

var (
DetermineAccessLevelAfterGrant = determineAccessLevelAfterGrant
PollDuration = pollDuration
)

func (w *Watcher) PollControllerModels(ctx context.Context, ctl *dbmodel.Controller) {
Expand Down
11 changes: 11 additions & 0 deletions service.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"io"
"net/http"
"os"
"strconv"
"strings"
"time"

Expand Down Expand Up @@ -150,6 +151,10 @@ type Params struct {

// PublicKey holds the public part of the bakery keypair.
PublicKey string

// AuditLogRetentionPeriod is the amount of days detailing how long
ale8k marked this conversation as resolved.
Show resolved Hide resolved
// to keep an audit log for before purging it from the database.
AuditLogRetentionPeriod string
}

// A Service is the implementation of a JIMM server.
Expand Down Expand Up @@ -255,6 +260,12 @@ func NewService(ctx context.Context, p Params) (*Service, error) {
return nil, errors.E(op, err)
}

period, err := strconv.Atoi(p.AuditLogRetentionPeriod)
if err != nil {
return nil, errors.E(op, "failed to parse audit log retention period")
}
jimm.NewAuditLogCleanupService(ctx, s.jimm.Database, period).Start()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmmm.. could we then just have a single method like StartAuditLogCleanupService instead of New().Start()?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer creation of service then start honestly, it's a nice separation of concerns, constructor vs behaviour


openFGAclient, err := newOpenFGAClient(ctx, p)
if err != nil {
return nil, errors.E(op, err)
Expand Down