Skip to content

Commit

Permalink
NETOBSERV-1954: fix under-estimation of traffic (#444)
Browse files Browse the repository at this point in the history
* NETOBSERV-1954: fix under-estimation of traffic

Remove flawed logic that ignores some flows, especially visible under
stress
This came from that time: https://github.com/netobserv/netobserv-ebpf-agent/pull/49/files#diff-f3248f855cc319b6d96125842a513586d6d8cbfb1e241a93e79911d08e41c728L334-L336

Old comment said: "...when a new flow maps to the same entry, it has some zombie entries." - this is weird and I'm not sure how that would be possible, flow_metrics entries should be zeroed as we're using designed initializers. In any case, relying on a single eviction time reference is wrong because flows are evicted one by one, resulting in many new flows having their timer set before the "last eviction time".

If we find evidence of those "zombie entries" then we must find another
way to fix them.

* do not set startTime=0
  • Loading branch information
jotak authored Nov 12, 2024
1 parent 8424fa8 commit 0de6a01
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 15 deletions.
14 changes: 0 additions & 14 deletions pkg/flow/tracer_map.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ type MapTracer struct {
staleEntriesEvictTimeout time.Duration
// manages the access to the eviction routines, avoiding two evictions happening at the same time
evictionCond *sync.Cond
lastEvictionNs uint64
metrics *metrics.Metrics
timeSpentinLookupAndDelete prometheus.Histogram
}
Expand All @@ -40,7 +39,6 @@ func NewMapTracer(fetcher mapFetcher, evictionTimeout, staleEntriesEvictTimeout
return &MapTracer{
mapFetcher: fetcher,
evictionTimeout: evictionTimeout,
lastEvictionNs: uint64(monotime.Now()),
evictionCond: sync.NewCond(&sync.Mutex{}),
staleEntriesEvictTimeout: staleEntriesEvictTimeout,
metrics: m,
Expand Down Expand Up @@ -101,7 +99,6 @@ func (m *MapTracer) evictFlows(ctx context.Context, forceGC bool, forwardFlows c
currentTime := time.Now()

var forwardingFlows []*model.Record
laterFlowNs := uint64(0)
flows := m.mapFetcher.LookupAndDeleteMap(m.metrics)
elapsed := time.Since(currentTime)
for flowKey, flowMetrics := range flows {
Expand All @@ -110,10 +107,6 @@ func (m *MapTracer) evictFlows(ctx context.Context, forceGC bool, forwardFlows c
if aggregatedMetrics.EndMonoTimeTs == 0 {
continue
}
// If it iterated an entry that do not have updated flows
if aggregatedMetrics.EndMonoTimeTs > laterFlowNs {
laterFlowNs = aggregatedMetrics.EndMonoTimeTs
}
forwardingFlows = append(forwardingFlows, model.NewRecord(
flowKey,
aggregatedMetrics,
Expand All @@ -122,7 +115,6 @@ func (m *MapTracer) evictFlows(ctx context.Context, forceGC bool, forwardFlows c
))
}
m.mapFetcher.DeleteMapsStaleEntries(m.staleEntriesEvictTimeout)
m.lastEvictionNs = laterFlowNs
select {
case <-ctx.Done():
mtlog.Debug("skipping flow eviction as agent is being stopped")
Expand All @@ -146,12 +138,6 @@ func (m *MapTracer) aggregate(metrics []ebpf.BpfFlowMetrics) *ebpf.BpfFlowMetric
}
aggr := &ebpf.BpfFlowMetrics{}
for i := range metrics {
// eBPF hashmap values are not zeroed when the entry is removed. That causes that we
// might receive entries from previous collect-eviction timeslots.
// We need to check the flow time and discard old flows.
if metrics[i].StartMonoTimeTs <= m.lastEvictionNs || metrics[i].EndMonoTimeTs <= m.lastEvictionNs {
continue
}
model.Accumulate(aggr, &metrics[i])
}
return aggr
Expand Down
2 changes: 1 addition & 1 deletion pkg/model/record.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ func NewRecord(

func Accumulate(r *ebpf.BpfFlowMetrics, src *ebpf.BpfFlowMetrics) {
// time == 0 if the value has not been yet set
if r.StartMonoTimeTs == 0 || r.StartMonoTimeTs > src.StartMonoTimeTs {
if r.StartMonoTimeTs == 0 || (r.StartMonoTimeTs > src.StartMonoTimeTs && src.StartMonoTimeTs != 0) {
r.StartMonoTimeTs = src.StartMonoTimeTs
}
if r.EndMonoTimeTs == 0 || r.EndMonoTimeTs < src.EndMonoTimeTs {
Expand Down

0 comments on commit 0de6a01

Please sign in to comment.