Skip to content

Commit

Permalink
Adding experimental flag for Metrics Labels String Interning (cortexp…
Browse files Browse the repository at this point in the history
…roject#6057)

* Metrics Labels String Interning

Signed-off-by: alanprot <[email protected]>

* Changelog

Signed-off-by: alanprot <[email protected]>

* v1 guarantees

Signed-off-by: alanprot <[email protected]>

---------

Signed-off-by: alanprot <[email protected]>
  • Loading branch information
alanprot committed Jul 3, 2024
1 parent 4a82d36 commit 18e5fb3
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
* [ENHANCEMENT] Store Gateway: Log gRPC requests together with headers configured in `http_request_headers_to_log`. #5958
* [ENHANCEMENT] Upgrade Alpine to 3.19. #6014
* [ENHANCEMENT] Upgrade go to 1.21.11 #6014
* [ENHANCEMENT] Ingester: Add a new experimental `-ingester.labels-string-interning-enabled` flag to enable string interning for metrics labels. #6057
* [BUGFIX] Configsdb: Fix endline issue in db password. #5920
* [BUGFIX] Ingester: Fix `user` and `type` labels for the `cortex_ingester_tsdb_head_samples_appended_total` TSDB metric. #5952
* [BUGFIX] Querier: Enforce max query length check for `/api/v1/series` API even though `ignoreMaxQueryLength` is set to true. #6018
Expand Down
4 changes: 4 additions & 0 deletions docs/configuration/config-file-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -2971,6 +2971,10 @@ instance_limits:
# Customize the message contained in limit errors
# CLI flag: -ingester.admin-limit-message
[admin_limit_message: <string> | default = "please contact administrator to raise it"]
# Experimental: Enable string interning for metrics labels.
# CLI flag: -ingester.labels-string-interning-enabled
[labels_string_interning_enabled: <boolean> | default = false]
```

### `ingester_client_config`
Expand Down
2 changes: 2 additions & 0 deletions docs/configuration/v1-guarantees.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,5 @@ Currently experimental features are:
- `-ruler.ring.tokens-file-path` (path) CLI flag
- Native Histograms
- Ingestion can be enabled by setting `-blocks-storage.tsdb.enable-native-histograms=true` on Ingester.
- String interning for metrics labels
- Enable string interning for metrics labels by setting `-ingester.labels-string-interning-enabled` on Ingester.
23 changes: 21 additions & 2 deletions pkg/ingester/ingester.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ type Config struct {

// For admin contact details
AdminLimitMessage string `yaml:"admin_limit_message"`

LabelsStringInterningEnabled bool `yaml:"labels_string_interning_enabled"`
}

// RegisterFlags adds the flags required to config this to the given FlagSet
Expand All @@ -158,13 +160,18 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {

f.StringVar(&cfg.AdminLimitMessage, "ingester.admin-limit-message", "please contact administrator to raise it", "Customize the message contained in limit errors")

f.BoolVar(&cfg.LabelsStringInterningEnabled, "ingester.labels-string-interning-enabled", false, "Experimental: Enable string interning for metrics labels.")
}

func (cfg *Config) Validate() error {
if err := cfg.LifecyclerConfig.Validate(); err != nil {
return err
}

if cfg.LabelsStringInterningEnabled {
logutil.WarnExperimentalUse("String interning for metrics labels Enabled")
}

return nil
}

Expand Down Expand Up @@ -296,6 +303,10 @@ type userTSDB struct {
// Cached shipped blocks.
shippedBlocksMtx sync.Mutex
shippedBlocks map[ulid.ULID]struct{}

// Used to dedup strings and keep a single reference in memory
labelsStringInterningEnabled bool
interner util.Interner
}

// Explicitly wrapping the tsdb.DB functions that we use.
Expand Down Expand Up @@ -425,6 +436,9 @@ func (u *userTSDB) PostCreation(metric labels.Labels) {
}
u.seriesInMetric.increaseSeriesForMetric(metricName)
u.labelSetCounter.increaseSeriesLabelSet(u, metric)
if u.labelsStringInterningEnabled {
metric.InternStrings(u.interner.Intern)
}
}

// PostDeletion implements SeriesLifecycleCallback interface.
Expand All @@ -439,6 +453,9 @@ func (u *userTSDB) PostDeletion(metrics map[chunks.HeadSeriesRef]labels.Labels)
}
u.seriesInMetric.decreaseSeriesForMetric(metricName)
u.labelSetCounter.decreaseSeriesLabelSet(u, metric)
if u.labelsStringInterningEnabled {
metric.ReleaseStrings(u.interner.Release)
}
}
}

Expand Down Expand Up @@ -2047,8 +2064,10 @@ func (i *Ingester) createTSDB(userID string) (*userTSDB, error) {
ingestedAPISamples: util_math.NewEWMARate(0.2, i.cfg.RateUpdatePeriod),
ingestedRuleSamples: util_math.NewEWMARate(0.2, i.cfg.RateUpdatePeriod),

instanceLimitsFn: i.getInstanceLimits,
instanceSeriesCount: &i.TSDBState.seriesCount,
instanceLimitsFn: i.getInstanceLimits,
instanceSeriesCount: &i.TSDBState.seriesCount,
interner: util.NewInterner(),
labelsStringInterningEnabled: i.cfg.LabelsStringInterningEnabled,
}

enableExemplars := false
Expand Down
1 change: 1 addition & 0 deletions pkg/ingester/lifecycle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ func defaultIngesterTestConfig(t testing.TB) Config {
cfg.LifecyclerConfig.ID = "localhost"
cfg.LifecyclerConfig.FinalSleep = 0
cfg.ActiveSeriesMetricsEnabled = true
cfg.LabelsStringInterningEnabled = true
return cfg
}

Expand Down
78 changes: 78 additions & 0 deletions pkg/util/strings.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"unsafe"

"github.com/bboreham/go-loser"
"go.uber.org/atomic"
)

// StringsContain returns true if the search value is within the list of input values.
Expand Down Expand Up @@ -139,3 +140,80 @@ func MergeSortedSlices(ctx context.Context, a ...[]string) ([]string, error) {
}
return r, nil
}

type Interner interface {
Intern(s string) string
Release(s string)
}

// NewInterner returns a new Interner to be used to intern strings.
// Based on https://github.com/prometheus/prometheus/blob/726ed124e4468d0274ba89b0934a6cc8c975532d/storage/remote/intern.go#L51
func NewInterner() Interner {
return &pool{
pool: map[string]*entry{},
}
}

type pool struct {
mtx sync.RWMutex
pool map[string]*entry
}

type entry struct {
refs atomic.Int64

s string
}

func newEntry(s string) *entry {
return &entry{s: s}
}

// Intern returns the interned string. It returns the canonical representation of string.
func (p *pool) Intern(s string) string {
if s == "" {
return ""
}

p.mtx.RLock()
interned, ok := p.pool[s]
p.mtx.RUnlock()
if ok {
interned.refs.Inc()
return interned.s
}
p.mtx.Lock()
defer p.mtx.Unlock()
if interned, ok := p.pool[s]; ok {
interned.refs.Inc()
return interned.s
}

p.pool[s] = newEntry(s)
p.pool[s].refs.Store(1)
return s
}

// Release releases a reference of the string `s`.
// If the reference count become 0, the string `s` is removed from the memory
func (p *pool) Release(s string) {
p.mtx.RLock()
interned, ok := p.pool[s]
p.mtx.RUnlock()

if !ok {
return
}

refs := interned.refs.Dec()
if refs > 0 {
return
}

p.mtx.Lock()
defer p.mtx.Unlock()
if interned.refs.Load() != 0 {
return
}
delete(p.pool, s)
}

0 comments on commit 18e5fb3

Please sign in to comment.