Skip to content

Commit

Permalink
feat: add MaxNodeUtilizationPercent in ReclaimedResourceConfiguration
Browse files Browse the repository at this point in the history
  • Loading branch information
WangZzzhe committed Aug 28, 2024
1 parent cb9c984 commit dbc797f
Show file tree
Hide file tree
Showing 10 changed files with 225 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ type ReclaimedResourceOptions struct {
MinReclaimedResourceForReport general.ResourceList
ReservedResourceForAllocate general.ResourceList
ReservedResourceForReclaimedCores general.ResourceList
MaxNodeUtilizationPercent map[string]int64

*cpuheadroom.CPUHeadroomOptions
*memoryheadroom.MemoryHeadroomOptions
Expand All @@ -58,8 +59,9 @@ func NewReclaimedResourceOptions() *ReclaimedResourceOptions {
v1.ResourceCPU: resource.MustParse("4"),
v1.ResourceMemory: resource.MustParse("0"),
},
CPUHeadroomOptions: cpuheadroom.NewCPUHeadroomOptions(),
MemoryHeadroomOptions: memoryheadroom.NewMemoryHeadroomOptions(),
MaxNodeUtilizationPercent: map[string]int64{},
CPUHeadroomOptions: cpuheadroom.NewCPUHeadroomOptions(),
MemoryHeadroomOptions: memoryheadroom.NewMemoryHeadroomOptions(),
}
}

Expand All @@ -77,6 +79,8 @@ func (o *ReclaimedResourceOptions) AddFlags(fss *cliflag.NamedFlagSets) {
"reserved reclaimed resource actually not allocate to reclaimed resource")
fs.Var(&o.ReservedResourceForReclaimedCores, "reserved-resource-for-reclaimed-cores",
"reserved resources for reclaimed_cores pods")
fs.StringToInt64Var(&o.MaxNodeUtilizationPercent, "max-node-utilization-percent", o.MaxNodeUtilizationPercent,
"node utilization resource limit for reclaimed pool")

o.CPUHeadroomOptions.AddFlags(fss)
o.MemoryHeadroomOptions.AddFlags(fss)
Expand All @@ -91,6 +95,12 @@ func (o *ReclaimedResourceOptions) ApplyTo(c *reclaimedresource.ReclaimedResourc
c.ReservedResourceForAllocate = v1.ResourceList(o.ReservedResourceForAllocate)
c.MinReclaimedResourceForAllocate = v1.ResourceList(o.ReservedResourceForReclaimedCores)

maxNodeUtilizationPercent := make(map[v1.ResourceName]int64)
for resourceName, value := range o.MaxNodeUtilizationPercent {
maxNodeUtilizationPercent[v1.ResourceName(resourceName)] = value
}
c.MaxNodeUtilizationPercent = maxNodeUtilizationPercent

errList = append(errList, o.CPUHeadroomOptions.ApplyTo(c.CPUHeadroomConfiguration))
errList = append(errList, o.MemoryHeadroomOptions.ApplyTo(c.MemoryHeadroomConfiguration))
return errors.NewAggregate(errList)
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ require (
)

replace (
github.com/kubewharf/katalyst-api => github.com/WangZzzhe/katalyst-api v0.0.0-20240828081900-5e532fa0166c
k8s.io/api => k8s.io/api v0.24.6
k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.24.6
k8s.io/apimachinery => k8s.io/apimachinery v0.24.6
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWX
github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI=
github.com/StackExchange/wmi v0.0.0-20180116203802-5d049714c4a6/go.mod h1:3eOhrUMpNV+6aFIbp5/iudMxNCF27Vw2OZgy4xEx0Fg=
github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g=
github.com/WangZzzhe/katalyst-api v0.0.0-20240828081900-5e532fa0166c h1:IlDjOO40SUWMITcOf9/f2enNXzD/BVMt/UtV6nNY4T4=
github.com/WangZzzhe/katalyst-api v0.0.0-20240828081900-5e532fa0166c/go.mod h1:Y2IeIorxQamF2a3oa0+URztl5QCSty6Jj3zD83R8J9k=
github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5/go.mod h1:SkGFH1ia65gfNATL8TAiHDNxPzPdmEL5uirI2Uyuz6c=
github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
Expand Down Expand Up @@ -568,8 +570,6 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kubewharf/katalyst-api v0.5.1-0.20240820031712-7c1239991078 h1:CSBXQOe0AzlWcGaww8uqOUDu+/4bL3hVNBz86oziOis=
github.com/kubewharf/katalyst-api v0.5.1-0.20240820031712-7c1239991078/go.mod h1:Y2IeIorxQamF2a3oa0+URztl5QCSty6Jj3zD83R8J9k=
github.com/kubewharf/kubelet v1.24.6-kubewharf.9 h1:jOTYZt7h/J7I8xQMKMUcJjKf5UFBv37jHWvNp5VRFGc=
github.com/kubewharf/kubelet v1.24.6-kubewharf.9/go.mod h1:MxbSZUx3wXztFneeelwWWlX7NAAStJ6expqq7gY2J3c=
github.com/kyoh86/exportloopref v0.1.7/go.mod h1:h1rDl2Kdj97+Kwh4gdz3ujE7XHmH51Q0lUiZ1z4NLj8=
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"time"

v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"

Expand Down Expand Up @@ -626,5 +627,8 @@ func (p *CPUPressureLoadEviction) checkPressureWithAdvisedThreshold() bool {
// for now, we consider ReservedResourceForAllocate as downgrading or manual intervention configuration,
// when it's set to a value greater than zero, fall back to static threshold
dynamicConfiguration := p.dynamicConf.GetDynamicConfiguration()
return dynamicConfiguration.EnableReclaim && dynamicConfiguration.ReservedResourceForAllocate.Cpu().Value() == 0
reservedResourceForAllocate := dynamicConfiguration.GetReservedResourceForAllocate(v1.ResourceList{
v1.ResourceCPU: *resource.NewQuantity(int64(p.metaServer.NumCPUs), resource.DecimalSI),
})
return dynamicConfiguration.EnableReclaim && reservedResourceForAllocate.Cpu().Value() == 0
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"fmt"

v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/klog/v2"
"k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
Expand Down Expand Up @@ -174,7 +175,10 @@ func (cra *cpuResourceAdvisor) updateNumasAvailableResource() {
}

func (cra *cpuResourceAdvisor) getNumasReservedForAllocate(numas machine.CPUSet) float64 {
reserved := cra.conf.GetDynamicConfiguration().ReservedResourceForAllocate[v1.ResourceCPU]
reserved := cra.conf.GetDynamicConfiguration().GetReservedResourceForAllocate(v1.ResourceList{
v1.ResourceCPU: *resource.NewQuantity(int64(cra.metaServer.NumCPUs), resource.DecimalSI),
})[v1.ResourceCPU]

return float64(reserved.Value()*int64(numas.Size())) / float64(cra.metaServer.NumNUMANodes)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,9 @@ func (ha *HeadroomAssemblerCommon) GetHeadroom() (resource.Quantity, error) {
return *resource.NewQuantity(0, resource.DecimalSI), nil
}

reserved := ha.conf.GetDynamicConfiguration().ReservedResourceForAllocate[v1.ResourceCPU]
reserved := ha.conf.GetDynamicConfiguration().GetReservedResourceForAllocate(v1.ResourceList{
v1.ResourceCPU: *resource.NewQuantity(int64(ha.metaServer.NumCPUs), resource.DecimalSI),
})[v1.ResourceCPU]
headroomTotal := 0.0
emptyNUMAs := ha.metaServer.CPUDetails.NUMANodes()
exclusiveNUMAs := machine.NewCPUSet()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,9 @@ func (ra *memoryResourceAdvisor) update() error {
return fmt.Errorf("meta reader has not synced")
}

reservedForAllocate := ra.conf.GetDynamicConfiguration().
ReservedResourceForAllocate[v1.ResourceMemory]
reservedForAllocate := ra.conf.GetDynamicConfiguration().GetReservedResourceForAllocate(v1.ResourceList{
v1.ResourceMemory: *resource.NewQuantity(int64(ra.metaServer.MemoryCapacity), resource.BinarySI),
})[v1.ResourceMemory]

for _, headroomPolicy := range ra.headroomPolices {
// capacity and reserved can both be adjusted dynamically during running process
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"sync"

v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"

"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/dynamicpolicy/memoryadvisor"
"github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/metacache"
Expand Down Expand Up @@ -96,8 +97,9 @@ func (m *memoryProvisioner) initializeMemoryProvisioner() error {
}

func (m *memoryProvisioner) Reconcile(status *types.MemoryPressureStatus) (err error) {
reservedForAllocate := m.conf.GetDynamicConfiguration().
ReservedResourceForAllocate[v1.ResourceMemory]
reservedForAllocate := m.conf.GetDynamicConfiguration().GetReservedResourceForAllocate(v1.ResourceList{
v1.ResourceMemory: *resource.NewQuantity(int64(m.metaServer.MemoryCapacity), resource.BinarySI),
})[v1.ResourceMemory]
m.policy.SetEssentials(
types.ResourceEssentials{
EnableReclaim: m.conf.GetDynamicConfiguration().EnableReclaim,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@ package reclaimedresource

import (
v1 "k8s.io/api/core/v1"
"k8s.io/klog/v2"

"github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic/adminqos/reclaimedresource/cpuheadroom"
"github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic/adminqos/reclaimedresource/memoryheadroom"
"github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic/crd"
"github.com/kubewharf/katalyst-core/pkg/util/native"
)

type ReclaimedResourceConfiguration struct {
Expand All @@ -30,6 +32,7 @@ type ReclaimedResourceConfiguration struct {
MinReclaimedResourceForReport v1.ResourceList
ReservedResourceForAllocate v1.ResourceList
MinReclaimedResourceForAllocate v1.ResourceList
MaxNodeUtilizationPercent map[v1.ResourceName]int64

*cpuheadroom.CPUHeadroomConfiguration
*memoryheadroom.MemoryHeadroomConfiguration
Expand Down Expand Up @@ -72,8 +75,50 @@ func (c *ReclaimedResourceConfiguration) ApplyConfiguration(conf *crd.DynamicCon
c.MinReclaimedResourceForAllocate[resourceName] = value
}
}

if config.MaxNodeUtilizationPercent != nil {
for resourceName, value := range config.MaxNodeUtilizationPercent {
c.MaxNodeUtilizationPercent[resourceName] = value
}
}
}

c.CPUHeadroomConfiguration.ApplyConfiguration(conf)
c.MemoryHeadroomConfiguration.ApplyConfiguration(conf)
}

func (c *ReclaimedResourceConfiguration) GetReservedResourceForAllocate(nodeResourceList v1.ResourceList) v1.ResourceList {
if len(c.MaxNodeUtilizationPercent) == 0 {
return c.ReservedResourceForAllocate
}

res := v1.ResourceList{}
for resource, quantity := range c.ReservedResourceForAllocate {
nodeAllocatable, ok := nodeResourceList[resource]
if !ok {
res[resource] = quantity
continue
}

maxUtil, ok := c.MaxNodeUtilizationPercent[resource]
if !ok {
res[resource] = quantity
continue
}
if maxUtil <= 0 || maxUtil > 100 {
klog.Warningf("unsupported MaxNodeUtilizationPercent, resourceName: %v, value: %v", resource, maxUtil)
res[resource] = quantity
continue
}

nodeAllocatableCopy := nodeAllocatable.DeepCopy()
nodeAllocatableCopy.Sub(native.MultiplyResourceQuantity(resource, nodeAllocatableCopy, float64(maxUtil)/100.0))
res[resource] = nodeAllocatableCopy

klog.V(6).Infof("GetReservedResourceForAllocate resource: %v, nodeResource: %v, "+
"MaxNodeUtilizationPercent: %v, ReservedResourceForAllocate: %v, res: %v",
resource, nodeAllocatable.String(), maxUtil, quantity.String(), nodeAllocatableCopy.String())
}

return res
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
/*
Copyright 2022 The Katalyst Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package reclaimedresource

import (
"testing"

"github.com/stretchr/testify/assert"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
)

func TestGetReservedResourceForAllocate(t *testing.T) {
t.Parallel()

for _, tc := range []struct {
name string
config *ReclaimedResourceConfiguration
nodeResourceList v1.ResourceList
expectRes v1.ResourceList
}{
{
name: "MaxNodeUtilizationPercent not set",
config: &ReclaimedResourceConfiguration{
ReservedResourceForAllocate: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: resource.MustParse("4"),
v1.ResourceMemory: resource.MustParse("8Gi"),
},
},
nodeResourceList: map[v1.ResourceName]resource.Quantity{},
expectRes: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: resource.MustParse("4"),
v1.ResourceMemory: resource.MustParse("8Gi"),
},
},
{
name: "MaxNodeUtilizationPercent set, only get cpu",
config: &ReclaimedResourceConfiguration{
ReservedResourceForAllocate: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: resource.MustParse("4"),
v1.ResourceMemory: resource.MustParse("8Gi"),
},
MaxNodeUtilizationPercent: map[v1.ResourceName]int64{
v1.ResourceCPU: 60,
v1.ResourceMemory: 80,
},
},
nodeResourceList: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: resource.MustParse("32"),
},
expectRes: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: resource.MustParse("12800m"),
v1.ResourceMemory: resource.MustParse("8Gi"),
},
},
{
name: "MaxNodeUtilizationPercent set, only get memory",
config: &ReclaimedResourceConfiguration{
ReservedResourceForAllocate: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: resource.MustParse("4"),
v1.ResourceMemory: resource.MustParse("8Gi"),
},
MaxNodeUtilizationPercent: map[v1.ResourceName]int64{
v1.ResourceCPU: 60,
v1.ResourceMemory: 80,
},
},
nodeResourceList: map[v1.ResourceName]resource.Quantity{
v1.ResourceMemory: resource.MustParse("100Gi"),
},
expectRes: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: resource.MustParse("4"),
v1.ResourceMemory: resource.MustParse("20Gi"),
},
},
{
name: "MaxNodeUtilizationPercent only cpu set",
config: &ReclaimedResourceConfiguration{
ReservedResourceForAllocate: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: resource.MustParse("4"),
v1.ResourceMemory: resource.MustParse("8Gi"),
},
MaxNodeUtilizationPercent: map[v1.ResourceName]int64{
v1.ResourceMemory: 80,
},
},
nodeResourceList: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: resource.MustParse("32"),
},
expectRes: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: resource.MustParse("4"),
v1.ResourceMemory: resource.MustParse("8Gi"),
},
},
{
name: "MaxNodeUtilizationPercent value not supported",
config: &ReclaimedResourceConfiguration{
ReservedResourceForAllocate: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: resource.MustParse("4"),
v1.ResourceMemory: resource.MustParse("8Gi"),
},
MaxNodeUtilizationPercent: map[v1.ResourceName]int64{
v1.ResourceCPU: 120,
v1.ResourceMemory: 0,
},
},
nodeResourceList: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: resource.MustParse("32"),
v1.ResourceMemory: resource.MustParse("100Gi"),
},
expectRes: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: resource.MustParse("4"),
v1.ResourceMemory: resource.MustParse("8Gi"),
},
},
} {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()

res := tc.config.GetReservedResourceForAllocate(tc.nodeResourceList)
assert.Equal(t, len(tc.expectRes), len(res))
for resource, quantity := range res {
expectQuan, ok := tc.expectRes[resource]
assert.True(t, ok)

assert.True(t, quantity.Equal(expectQuan))
}
})
}
}

0 comments on commit dbc797f

Please sign in to comment.