Skip to content

Commit

Permalink
fix(qrm): fix calculateHintsForNUMABindingReclaimedCores and add some…
Browse files Browse the repository at this point in the history
… logs
  • Loading branch information
luomingmeng committed Oct 29, 2024
1 parent ecbd600 commit 90820ee
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 42 deletions.
31 changes: 19 additions & 12 deletions pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_hint_handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -385,41 +385,48 @@ func (p *DynamicPolicy) reclaimedCoresWithNUMABindingHintHandler(_ context.Conte
}
}

general.Infof("memory hints for pod:%s/%s, container: %s success, hints: %v",
req.PodNamespace, req.PodName, req.ContainerName, hints)

return util.PackResourceHintsResponse(req, string(v1.ResourceCPU), hints)
}

func (p *DynamicPolicy) calculateHintsForNUMABindingReclaimedCores(reqFloat float64, podEntries state.PodEntries,
machineState state.NUMANodeMap,
numaHeadroomState map[int]float64,
) (map[string]*pluginapi.ListOfTopologyHints, error) {
// Calculate the available CPU headroom for non-RNB (Reclaimed Non-Binding) NUMA nodes
nonBindingNUMAsCPUQuantity := machineState.GetFilteredAvailableHeadroom(numaHeadroomState, nil,
state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckReclaimedActualNUMABinding))

// Determine the set of NUMA nodes currently hosting non-RNB pods
nonBindingNUMAs := machineState.GetFilteredNUMASet(state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckReclaimedActualNUMABinding))
nonActualBindingNUMAs := machineState.GetFilteredNUMASet(state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckReclaimedActualNUMABinding))

// Calculate the total requested resources for non-RNB reclaimed pods
nonBindingReclaimedRequestedQuantity := state.GetRequestedQuantityFromPodEntries(podEntries,
nonActualBindingReclaimedRequestedQuantity := state.GetRequestedQuantityFromPodEntries(podEntries,
state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckReclaimedNonActualNUMABinding),
p.getContainerRequestedCores)

// Compute the total available headroom for non-RNB NUMA nodes
nonBindingReclaimedNUMAHeadroom := state.GetReclaimedNUMAHeadroom(numaHeadroomState, nonBindingNUMAs)
nonActualBindingReclaimedNUMAHeadroom := state.GetReclaimedNUMAHeadroom(numaHeadroomState, nonActualBindingNUMAs)

// Identify candidate NUMA nodes for RNB (Reclaimed NUMA Binding) cores
// This includes both RNB NUMA nodes and NUMA nodes that can shrink from the non-RNB set
candidateNUMANodes := p.filterNUMANodesByNonBindingReclaimedRequestedQuantity(nonBindingReclaimedRequestedQuantity,
nonBindingNUMAsCPUQuantity, nonBindingNUMAs, numaHeadroomState)
candidateNUMANodes := p.filterNUMANodesByNonBindingReclaimedRequestedQuantity(nonActualBindingReclaimedRequestedQuantity,
nonActualBindingReclaimedNUMAHeadroom, nonActualBindingNUMAs, numaHeadroomState)

// Sort them based on the other qos numa binding pods and their headroom
p.sortCandidateNUMANodesForReclaimed(candidateNUMANodes, machineState, numaHeadroomState)

candidateLeft, maxCPULeft := p.calculateNUMANodesLeft(candidateNUMANodes, machineState, numaHeadroomState, reqFloat)

general.InfoS("nonActualBindingNUMAs", nonActualBindingNUMAs.String(),
"nonActualBindingReclaimedRequestedQuantity", nonActualBindingReclaimedRequestedQuantity,
"nonActualBindingReclaimedNUMAHeadroom", nonActualBindingReclaimedNUMAHeadroom,
"numaHeadroomState", numaHeadroomState,
"candidateNUMANodes", candidateNUMANodes,
"candidateLeft", candidateLeft,
"maxCPULeft", maxCPULeft)

hints := &pluginapi.ListOfTopologyHints{}

nonBindingReclaimedLeft := nonBindingReclaimedNUMAHeadroom - nonBindingReclaimedRequestedQuantity - reqFloat
nonBindingReclaimedLeft := nonActualBindingReclaimedNUMAHeadroom - nonActualBindingReclaimedRequestedQuantity - reqFloat
if maxCPULeft >= 0 {
p.populateBestEffortHintsByAvailableNUMANodes(hints, candidateNUMANodes, candidateLeft,
0)
Expand All @@ -429,8 +436,8 @@ func (p *DynamicPolicy) calculateHintsForNUMABindingReclaimedCores(reqFloat floa
}

// Finally, add non-RNB NUMA nodes as preferred hints, but these will only be selected if no RNB NUMA nodes meet the requirements
if nonBindingNUMAs.Size() > 0 {
util.PopulatePreferHintsByNUMANodes(hints, nonBindingNUMAs.ToSliceInt())
if nonActualBindingNUMAs.Size() > 0 {
util.PopulatePreferHintsByNUMANodes(hints, nonActualBindingNUMAs.ToSliceInt())
}

return map[string]*pluginapi.ListOfTopologyHints{
Expand Down
22 changes: 0 additions & 22 deletions pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -438,20 +438,6 @@ func (ns *NUMANodeState) GetFilteredDefaultCPUSet(excludeEntry, excludeWholeNUMA
return res
}

func (ns *NUMANodeState) GetFilteredAvailableHeadroom(numaHeadroom float64, excludeEntry, excludeWholeNUMA func(ai *AllocationInfo) bool) float64 {
res := numaHeadroom
for _, containerEntries := range ns.PodEntries {
for _, allocationInfo := range containerEntries {
if excludeWholeNUMA != nil && excludeWholeNUMA(allocationInfo) {
return 0
} else if excludeEntry != nil && excludeEntry(allocationInfo) {
res -= allocationInfo.RequestQuantity
}
}
}
return res
}

// ExistMatchedAllocationInfo returns true if the stated predicate holds true for some pods of this numa else it returns false.
func (ns *NUMANodeState) ExistMatchedAllocationInfo(f func(ai *AllocationInfo) bool) bool {
for _, containerEntries := range ns.PodEntries {
Expand Down Expand Up @@ -539,14 +525,6 @@ func (nm NUMANodeMap) GetFilteredNUMASet(excludeNUMAPredicate func(ai *Allocatio
return res
}

func (nm NUMANodeMap) GetFilteredAvailableHeadroom(numaHeadroom map[int]float64, excludeEntry, excludeWholeNUMA func(ai *AllocationInfo) bool) float64 {
res := float64(0)
for id, numaNodeState := range nm {
res += numaNodeState.GetFilteredAvailableHeadroom(numaHeadroom[id], excludeEntry, excludeWholeNUMA)
}
return res
}

// GetFilteredNUMASetWithAnnotations return numa set except the numa
// which are excluded by the predicate accepting AllocationInfo in the target NUMA and input annotations of candidate.
func (nm NUMANodeMap) GetFilteredNUMASetWithAnnotations(
Expand Down
21 changes: 13 additions & 8 deletions pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_hint_handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -467,32 +467,37 @@ func (p *DynamicPolicy) calculateHintsForNUMABindingReclaimedCores(reqInt int64,
return nil, fmt.Errorf("calculateHints with empty machineState")
}

// Calculate the available CPU headroom for non-RNB (Reclaimed Non-Binding) NUMA nodes
nonActualBindingNUMAsMemoryQuantity := machineState.GetNonActualNUMABindingAvailableHeadroom(numaHeadroomState)

// Determine the set of NUMA nodes currently hosting non-RNB pods
nonActualBindingNUMAs := machineState.GetNUMANodesWithoutReclaimedActualNUMABindingPods()

// Calculate the total requested resources for non-RNB reclaimed pods
nonBindingReclaimedRequestedQuantity := state.GetRequestedQuantityFromPodEntries(podEntries,
nonActualBindingReclaimedRequestedQuantity := state.GetRequestedQuantityFromPodEntries(podEntries,
state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckReclaimedNonActualNUMABinding))

// Compute the total available headroom for non-RNB NUMA nodes
nonBindingReclaimedNUMAHeadroom := state.GetReclaimedNUMAHeadroom(numaHeadroomState, nonActualBindingNUMAs)
nonActualBindingReclaimedNUMAHeadroom := state.GetReclaimedNUMAHeadroom(numaHeadroomState, nonActualBindingNUMAs)

// Identify candidate NUMA nodes for RNB (Reclaimed NUMA Binding) cores
// This includes both RNB NUMA nodes and NUMA nodes that can shrink from the non-RNB set
candidateNUMANodes := p.filterNUMANodesByNonBindingReclaimedRequestedQuantity(nonBindingReclaimedRequestedQuantity,
nonActualBindingNUMAsMemoryQuantity, nonActualBindingNUMAs, numaHeadroomState)
candidateNUMANodes := p.filterNUMANodesByNonBindingReclaimedRequestedQuantity(nonActualBindingReclaimedRequestedQuantity,
nonActualBindingReclaimedNUMAHeadroom, nonActualBindingNUMAs, numaHeadroomState)

// Sort candidate NUMA nodes based on the other qos numa binding pods and their headroom
p.sortCandidateNUMANodesForReclaimed(candidateNUMANodes, machineState, numaHeadroomState)

candidateLeft, maxMemoryLeft := p.calculateNUMANodesLeft(candidateNUMANodes, machineState, numaHeadroomState, reqInt)

general.InfoS("nonActualNUMABindingNUMAs", nonActualBindingNUMAs.String(),
"nonActualBindingReclaimedRequestedQuantity", nonActualBindingReclaimedRequestedQuantity,
"nonActualNUMABindingReclaimedNUMAHeadroom", nonActualBindingReclaimedNUMAHeadroom,
"numaHeadroomState", numaHeadroomState,
"candidateNUMANodes", candidateNUMANodes,
"candidateLeft", candidateLeft,
"maxMemoryLeft", maxMemoryLeft)

hints := &pluginapi.ListOfTopologyHints{}

nonBindingReclaimedLeft := nonBindingReclaimedNUMAHeadroom - nonBindingReclaimedRequestedQuantity - reqInt
nonBindingReclaimedLeft := nonActualBindingReclaimedNUMAHeadroom - nonActualBindingReclaimedRequestedQuantity - reqInt
if maxMemoryLeft >= 0 {
p.populateBestEffortHintsByAvailableNUMANodes(hints, candidateNUMANodes, candidateLeft,
0)
Expand Down

0 comments on commit 90820ee

Please sign in to comment.