-
Notifications
You must be signed in to change notification settings - Fork 4.2k
Feat: partitionable devices support #8559
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 16 commits
9dc8e0f
9a450a1
ff1d30f
d0f230e
e92825e
862482f
bb5c8d9
f9f397a
4453bf2
6e4f48b
85a0d94
b09676c
212869b
4fa5202
9f2c8db
3956443
c61b8ad
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,6 +21,7 @@ import ( | |
|
|
||
| v1 "k8s.io/api/core/v1" | ||
| resourceapi "k8s.io/api/resource/v1" | ||
| "k8s.io/apimachinery/pkg/api/resource" | ||
| "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" | ||
| ) | ||
|
|
||
|
|
@@ -43,7 +44,7 @@ func CalculateDynamicResourceUtilization(nodeInfo *framework.NodeInfo) (map[stri | |
| poolDevices := getAllDevices(currentSlices) | ||
| allocatedDeviceNames := allocatedDevices[driverName][poolName] | ||
| unallocated, allocated := splitDevicesByAllocation(poolDevices, allocatedDeviceNames) | ||
| result[driverName][poolName] = calculatePoolUtil(unallocated, allocated) | ||
| result[driverName][poolName] = calculatePoolUtil(unallocated, allocated, currentSlices) | ||
| } | ||
| } | ||
| return result, nil | ||
|
|
@@ -69,10 +70,80 @@ func HighestDynamicResourceUtilization(nodeInfo *framework.NodeInfo) (v1.Resourc | |
| return highestResourceName, highestUtil, nil | ||
| } | ||
|
|
||
| func calculatePoolUtil(unallocated, allocated []resourceapi.Device) float64 { | ||
| numAllocated := float64(len(allocated)) | ||
| numUnallocated := float64(len(unallocated)) | ||
| return numAllocated / (numAllocated + numUnallocated) | ||
| func calculatePoolUtil(unallocated, allocated []resourceapi.Device, resourceSlices []*resourceapi.ResourceSlice) float64 { | ||
| TotalConsumedCounters := map[string]map[string]resource.Quantity{} | ||
| for _, resourceSlice := range resourceSlices { | ||
| for _, sharedCounter := range resourceSlice.Spec.SharedCounters { | ||
| if _, ok := TotalConsumedCounters[sharedCounter.Name]; !ok { | ||
| TotalConsumedCounters[sharedCounter.Name] = map[string]resource.Quantity{} | ||
| } | ||
| for counter, value := range sharedCounter.Counters { | ||
| TotalConsumedCounters[sharedCounter.Name][counter] = value.Value | ||
| } | ||
| } | ||
| } | ||
| allocatedConsumedCounters := calculateConsumedCounters(allocated) | ||
|
|
||
| // not all devices are partitionable, so fallback to the ratio of non-partionable devices | ||
| allocatedDevicesWithoutCounters := 0 | ||
| devicesWithoutCounters := 0 | ||
|
|
||
| for _, device := range allocated { | ||
| if device.ConsumesCounters == nil { | ||
| devicesWithoutCounters++ | ||
| allocatedDevicesWithoutCounters++ | ||
| } | ||
| } | ||
| for _, device := range unallocated { | ||
| if device.ConsumesCounters == nil { | ||
| devicesWithoutCounters++ | ||
| } | ||
| } | ||
|
|
||
| // we want to find the counter that is most utilized, since it is the "bottleneck" of the pool | ||
| var maxUtilization float64 | ||
| if devicesWithoutCounters == 0 { | ||
| maxUtilization = 0 | ||
| } else { | ||
| maxUtilization = float64(allocatedDevicesWithoutCounters) / float64(devicesWithoutCounters) | ||
| } | ||
| for counterSet, counters := range TotalConsumedCounters { | ||
| for counterName, totalValue := range counters { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: is this easier to follow? (rather then checking for
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, this is wayyy cleaner. I'll change it |
||
| if allocatedSet, exists := allocatedConsumedCounters[counterSet]; exists { | ||
| if allocatedValue, exists := allocatedSet[counterName]; exists && !totalValue.IsZero() { | ||
| utilization := float64(allocatedValue.Value()) / float64(totalValue.Value()) | ||
| if utilization > maxUtilization { | ||
|
||
| maxUtilization = utilization | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| return maxUtilization | ||
| } | ||
|
|
||
| // calculateConsumedCounters calculates the total counters consumed by a list of devices | ||
| func calculateConsumedCounters(devices []resourceapi.Device) map[string]map[string]resource.Quantity { | ||
| countersConsumed := map[string]map[string]resource.Quantity{} | ||
| for _, device := range devices { | ||
| if device.ConsumesCounters == nil { | ||
| continue | ||
| } | ||
| for _, consumedCounter := range device.ConsumesCounters { | ||
| if _, ok := countersConsumed[consumedCounter.CounterSet]; !ok { | ||
| countersConsumed[consumedCounter.CounterSet] = map[string]resource.Quantity{} | ||
| } | ||
| for counter, value := range consumedCounter.Counters { | ||
| if _, ok := countersConsumed[consumedCounter.CounterSet][counter]; !ok { | ||
| countersConsumed[consumedCounter.CounterSet][counter] = resource.Quantity{} | ||
| } | ||
| v := countersConsumed[consumedCounter.CounterSet][counter] | ||
| v.Add(value.Value) | ||
| countersConsumed[consumedCounter.CounterSet][counter] = v | ||
| } | ||
| } | ||
| } | ||
| return countersConsumed | ||
| } | ||
|
|
||
| func splitDevicesByAllocation(devices []resourceapi.Device, allocatedNames []string) (unallocated, allocated []resourceapi.Device) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there a chance that more than one resource slice from the passed in
resourceSliceswill have a counterCounterSetwith the same name (Nameproperty)? That would be the only reason to check for existence before initializingTotalConsumedCounters[sharedCounter.Name] = map[string]resource.Quantity{}. Also, if that's true, are we confident that they won't have any collisions with any of the names of theCountersin theirCounters map[string]Counter? Otherwise we're overwriting them below.tl;dr we may be able to simplify this and simply assign
TotalConsumedCounters[sharedCounter.Name] = map[string]resource.Quantity{}without having to first check if it's already there, or if not, there may be more checks.I did this and UT still pass:
$ git diff diff --git a/cluster-autoscaler/simulator/dynamicresources/utils/utilization.go b/cluster-autoscaler/simulator/dynamicresources/utils/utilization.go index c717fdfd6..98f7480a6 100644 --- a/cluster-autoscaler/simulator/dynamicresources/utils/utilization.go +++ b/cluster-autoscaler/simulator/dynamicresources/utils/utilization.go @@ -74,9 +74,7 @@ func calculatePoolUtil(unallocated, allocated []resourceapi.Device, resourceSlic TotalConsumedCounters := map[string]map[string]resource.Quantity{} for _, resourceSlice := range resourceSlices { for _, sharedCounter := range resourceSlice.Spec.SharedCounters { - if _, ok := TotalConsumedCounters[sharedCounter.Name]; !ok { - TotalConsumedCounters[sharedCounter.Name] = map[string]resource.Quantity{} - } + TotalConsumedCounters[sharedCounter.Name] = map[string]resource.Quantity{} for counter, value := range sharedCounter.Counters { TotalConsumedCounters[sharedCounter.Name][counter] = value.Value }Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My impression from the KEP is that there shouldn't be any collisions in counterset names, since these are unique within a resource pool.
I got the impression that there could be a collision of the same sharedcounter from 2 different resource pools, but this would be high improbable since it'd imply that the same exact device (same device ID) appears in multiple resource pools.
Since this code is within a pool's scope, I think I'll simplify it the way you suggested