From 18e5fb347dbb4484bbbf181c530f343c4e385878 Mon Sep 17 00:00:00 2001 From: Alan Protasio Date: Wed, 3 Jul 2024 09:07:14 -0700 Subject: [PATCH] Adding experimental flag for Metrics Labels String Interning (#6057) * Metrics Labels String Interning Signed-off-by: alanprot * Changelog Signed-off-by: alanprot * v1 guarantees Signed-off-by: alanprot --------- Signed-off-by: alanprot --- CHANGELOG.md | 1 + docs/configuration/config-file-reference.md | 4 ++ docs/configuration/v1-guarantees.md | 2 + pkg/ingester/ingester.go | 23 +++++- pkg/ingester/lifecycle_test.go | 1 + pkg/util/strings.go | 78 +++++++++++++++++++++ 6 files changed, 107 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3203db0790..6c03a76983 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ * [ENHANCEMENT] Store Gateway: Log gRPC requests together with headers configured in `http_request_headers_to_log`. #5958 * [ENHANCEMENT] Upgrade Alpine to 3.19. #6014 * [ENHANCEMENT] Upgrade go to 1.21.11 #6014 +* [ENHANCEMENT] Ingester: Add a new experimental `-ingester.labels-string-interning-enabled` flag to enable string interning for metrics labels. #6057 * [BUGFIX] Configsdb: Fix endline issue in db password. #5920 * [BUGFIX] Ingester: Fix `user` and `type` labels for the `cortex_ingester_tsdb_head_samples_appended_total` TSDB metric. #5952 * [BUGFIX] Querier: Enforce max query length check for `/api/v1/series` API even though `ignoreMaxQueryLength` is set to true. #6018 diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index ab0570c67e..6bea83aa01 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -2971,6 +2971,10 @@ instance_limits: # Customize the message contained in limit errors # CLI flag: -ingester.admin-limit-message [admin_limit_message: | default = "please contact administrator to raise it"] + +# Experimental: Enable string interning for metrics labels. +# CLI flag: -ingester.labels-string-interning-enabled +[labels_string_interning_enabled: | default = false] ``` ### `ingester_client_config` diff --git a/docs/configuration/v1-guarantees.md b/docs/configuration/v1-guarantees.md index 919c0033a8..fd6bbe6641 100644 --- a/docs/configuration/v1-guarantees.md +++ b/docs/configuration/v1-guarantees.md @@ -113,3 +113,5 @@ Currently experimental features are: - `-ruler.ring.tokens-file-path` (path) CLI flag - Native Histograms - Ingestion can be enabled by setting `-blocks-storage.tsdb.enable-native-histograms=true` on Ingester. +- String interning for metrics labels + - Enable string interning for metrics labels by setting `-ingester.labels-string-interning-enabled` on Ingester. diff --git a/pkg/ingester/ingester.go b/pkg/ingester/ingester.go index e067341049..3006dd7969 100644 --- a/pkg/ingester/ingester.go +++ b/pkg/ingester/ingester.go @@ -134,6 +134,8 @@ type Config struct { // For admin contact details AdminLimitMessage string `yaml:"admin_limit_message"` + + LabelsStringInterningEnabled bool `yaml:"labels_string_interning_enabled"` } // RegisterFlags adds the flags required to config this to the given FlagSet @@ -158,6 +160,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.StringVar(&cfg.AdminLimitMessage, "ingester.admin-limit-message", "please contact administrator to raise it", "Customize the message contained in limit errors") + f.BoolVar(&cfg.LabelsStringInterningEnabled, "ingester.labels-string-interning-enabled", false, "Experimental: Enable string interning for metrics labels.") } func (cfg *Config) Validate() error { @@ -165,6 +168,10 @@ func (cfg *Config) Validate() error { return err } + if cfg.LabelsStringInterningEnabled { + logutil.WarnExperimentalUse("String interning for metrics labels Enabled") + } + return nil } @@ -296,6 +303,10 @@ type userTSDB struct { // Cached shipped blocks. shippedBlocksMtx sync.Mutex shippedBlocks map[ulid.ULID]struct{} + + // Used to dedup strings and keep a single reference in memory + labelsStringInterningEnabled bool + interner util.Interner } // Explicitly wrapping the tsdb.DB functions that we use. @@ -425,6 +436,9 @@ func (u *userTSDB) PostCreation(metric labels.Labels) { } u.seriesInMetric.increaseSeriesForMetric(metricName) u.labelSetCounter.increaseSeriesLabelSet(u, metric) + if u.labelsStringInterningEnabled { + metric.InternStrings(u.interner.Intern) + } } // PostDeletion implements SeriesLifecycleCallback interface. @@ -439,6 +453,9 @@ func (u *userTSDB) PostDeletion(metrics map[chunks.HeadSeriesRef]labels.Labels) } u.seriesInMetric.decreaseSeriesForMetric(metricName) u.labelSetCounter.decreaseSeriesLabelSet(u, metric) + if u.labelsStringInterningEnabled { + metric.ReleaseStrings(u.interner.Release) + } } } @@ -2047,8 +2064,10 @@ func (i *Ingester) createTSDB(userID string) (*userTSDB, error) { ingestedAPISamples: util_math.NewEWMARate(0.2, i.cfg.RateUpdatePeriod), ingestedRuleSamples: util_math.NewEWMARate(0.2, i.cfg.RateUpdatePeriod), - instanceLimitsFn: i.getInstanceLimits, - instanceSeriesCount: &i.TSDBState.seriesCount, + instanceLimitsFn: i.getInstanceLimits, + instanceSeriesCount: &i.TSDBState.seriesCount, + interner: util.NewInterner(), + labelsStringInterningEnabled: i.cfg.LabelsStringInterningEnabled, } enableExemplars := false diff --git a/pkg/ingester/lifecycle_test.go b/pkg/ingester/lifecycle_test.go index d69be2f779..efa739b426 100644 --- a/pkg/ingester/lifecycle_test.go +++ b/pkg/ingester/lifecycle_test.go @@ -42,6 +42,7 @@ func defaultIngesterTestConfig(t testing.TB) Config { cfg.LifecyclerConfig.ID = "localhost" cfg.LifecyclerConfig.FinalSleep = 0 cfg.ActiveSeriesMetricsEnabled = true + cfg.LabelsStringInterningEnabled = true return cfg } diff --git a/pkg/util/strings.go b/pkg/util/strings.go index c085452286..ddc9de9ff9 100644 --- a/pkg/util/strings.go +++ b/pkg/util/strings.go @@ -6,6 +6,7 @@ import ( "unsafe" "github.com/bboreham/go-loser" + "go.uber.org/atomic" ) // StringsContain returns true if the search value is within the list of input values. @@ -139,3 +140,80 @@ func MergeSortedSlices(ctx context.Context, a ...[]string) ([]string, error) { } return r, nil } + +type Interner interface { + Intern(s string) string + Release(s string) +} + +// NewInterner returns a new Interner to be used to intern strings. +// Based on https://github.com/prometheus/prometheus/blob/726ed124e4468d0274ba89b0934a6cc8c975532d/storage/remote/intern.go#L51 +func NewInterner() Interner { + return &pool{ + pool: map[string]*entry{}, + } +} + +type pool struct { + mtx sync.RWMutex + pool map[string]*entry +} + +type entry struct { + refs atomic.Int64 + + s string +} + +func newEntry(s string) *entry { + return &entry{s: s} +} + +// Intern returns the interned string. It returns the canonical representation of string. +func (p *pool) Intern(s string) string { + if s == "" { + return "" + } + + p.mtx.RLock() + interned, ok := p.pool[s] + p.mtx.RUnlock() + if ok { + interned.refs.Inc() + return interned.s + } + p.mtx.Lock() + defer p.mtx.Unlock() + if interned, ok := p.pool[s]; ok { + interned.refs.Inc() + return interned.s + } + + p.pool[s] = newEntry(s) + p.pool[s].refs.Store(1) + return s +} + +// Release releases a reference of the string `s`. +// If the reference count become 0, the string `s` is removed from the memory +func (p *pool) Release(s string) { + p.mtx.RLock() + interned, ok := p.pool[s] + p.mtx.RUnlock() + + if !ok { + return + } + + refs := interned.refs.Dec() + if refs > 0 { + return + } + + p.mtx.Lock() + defer p.mtx.Unlock() + if interned.refs.Load() != 0 { + return + } + delete(p.pool, s) +}