Skip to content

Commit

Permalink
Merge pull request #614 from sapcc/resource_topology
Browse files Browse the repository at this point in the history
Add support for AZ-separated quotas
  • Loading branch information
majewsky authored Dec 9, 2024
2 parents e92acb9 + 8e72ed5 commit 072f27e
Show file tree
Hide file tree
Showing 24 changed files with 567 additions and 103 deletions.
3 changes: 1 addition & 2 deletions docs/operators/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,6 @@ Some special behaviors for resources can be configured in the `resource_behavior
| `resource_behavior[].resource` | yes | Must contain a regex. The behavior entry applies to all resources where this regex matches against a slash-concatenated pair of service type and resource name. The anchors `^` and `$` are implied at both ends, so the regex must match the entire phrase. |
| `resource_behavior[].overcommit_factor` | no | If given, capacity for matching resources will be computed as `raw_capacity * overcommit_factor`, where `raw_capacity` is what the capacity plugin reports. |
| `resource_behavior[].commitment_durations` | no | If given, commitments for this resource can be created with any of the given durations. The duration format is the same as in the `commitments[].duration` attribute that appears on the resource API. If empty, this resource does not accept commitments. |
| `resource_behavior[].commitment_is_az_aware` | no | If true, commitments for this resource must be created in a specific AZ (i.e. not in a pseudo-AZ). If false, commitments for this resource must be created in the pseudo-AZ `any`. Ignored if `commitment_durations` is empty. |
| `resource_behavior[].commitment_min_confirm_date` | no | If given, commitments for this resource will always be created with `confirm_by` no earlier than this timestamp. This can be used to plan the introduction of commitments on a specific date. Ignored if `commitment_durations` is empty. |
| `resource_behavior[].commitment_until_percent` | no | If given, commitments for this resource will only be confirmed while the total of all confirmed commitments or uncommitted usage in the respective AZ is smaller than the respective percentage of the total capacity for that AZ. This is intended to provide a reserved buffer for the growth quota configured by `quota_distribution_configs[].autogrow.growth_multiplier`. Defaults to 100, i.e. all capacity is committable. |
| `resource_behavior[].commitment_conversion.identifier` | no | If given, must contain a string. Commitments for this resource will then be allowed to be converted into commitments for all resources that set the same conversion identifier. |
Expand All @@ -147,7 +146,7 @@ resource_behavior:
# matches both sharev2/share_capacity and sharev2/snapshot_capacity
- { resource: sharev2/.*_capacity, overcommit_factor: 2 }
# starting in 2024, offer commitments for Cinder storage
- { resource: volumev2/capacity, commitment_durations: [ 1 year, 2 years, 3 years ], commitment_is_az_aware: true, commitment_min_confirm_date: 2024-01-01T00:00:00Z }
- { resource: volumev2/capacity, commitment_durations: [ 1 year, 2 years, 3 years ], commitment_min_confirm_date: 2024-01-01T00:00:00Z }
# an Ironic flavor has been renamed from "thebigbox" to "baremetal_large"
- { resource: compute/instances_baremetal_large, identity_in_v1_api: compute/instances_thebigbox }
```
Expand Down
4 changes: 0 additions & 4 deletions internal/api/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,6 @@ const (
- resource: 'shared/(capacity|things)$'
commitment_durations: ["1 hour", "2 hours"]
commitment_min_confirm_date: '1970-01-08T00:00:00Z' # one week after start of mock.Clock
- resource: 'shared/capacity$'
commitment_is_az_aware: true
- resource: shared/things
commitment_is_az_aware: false
`
)

Expand Down
12 changes: 7 additions & 5 deletions internal/api/commitment.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
"github.com/sapcc/go-api-declarations/cadf"
"github.com/sapcc/go-api-declarations/limes"
limesresources "github.com/sapcc/go-api-declarations/limes/resources"
"github.com/sapcc/go-api-declarations/liquid"
"github.com/sapcc/go-bits/audittools"
"github.com/sapcc/go-bits/gopherpolicy"
"github.com/sapcc/go-bits/httpapi"
Expand Down Expand Up @@ -234,18 +235,19 @@ func (p *v1Provider) parseAndValidateCommitmentRequest(w http.ResponseWriter, r
return nil, nil, nil
}
behavior := p.Cluster.BehaviorForResource(dbServiceType, dbResourceName)
resInfo := p.Cluster.InfoForResource(dbServiceType, dbResourceName)
if len(behavior.CommitmentDurations) == 0 {
http.Error(w, "commitments are not enabled for this resource", http.StatusUnprocessableEntity)
return nil, nil, nil
}
if behavior.CommitmentIsAZAware {
if !slices.Contains(p.Cluster.Config.AvailabilityZones, req.AvailabilityZone) {
http.Error(w, "no such availability zone", http.StatusUnprocessableEntity)
if resInfo.Topology == liquid.FlatResourceTopology {
if req.AvailabilityZone != limes.AvailabilityZoneAny {
http.Error(w, `resource does not accept AZ-aware commitments, so the AZ must be set to "any"`, http.StatusUnprocessableEntity)
return nil, nil, nil
}
} else {
if req.AvailabilityZone != limes.AvailabilityZoneAny {
http.Error(w, `resource does not accept AZ-aware commitments, so the AZ must be set to "any"`, http.StatusUnprocessableEntity)
if !slices.Contains(p.Cluster.Config.AvailabilityZones, req.AvailabilityZone) {
http.Error(w, "no such availability zone", http.StatusUnprocessableEntity)
return nil, nil, nil
}
}
Expand Down
21 changes: 9 additions & 12 deletions internal/api/commitment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,12 @@ import (
"testing"
"time"

"github.com/sapcc/go-api-declarations/liquid"
"github.com/sapcc/go-bits/assert"

"github.com/sapcc/limes/internal/db"
"github.com/sapcc/limes/internal/test"
"github.com/sapcc/limes/internal/test/plugins"
)

const day = 24 * time.Hour
Expand All @@ -47,10 +49,6 @@ const testCommitmentsYAML = `
- resource: first/.*
commitment_durations: ["1 hour", "2 hours"]
commitment_min_confirm_date: '1970-01-08T00:00:00Z' # one week after start of mock.Clock
- resource: first/things
commitment_is_az_aware: false
- resource: first/capacity
commitment_is_az_aware: true
`
const testCommitmentsYAMLWithoutMinConfirmDate = `
availability_zones: [ az-one, az-two ]
Expand All @@ -65,12 +63,6 @@ const testCommitmentsYAMLWithoutMinConfirmDate = `
# the resources in "first" have commitments, the ones in "second" do not
- resource: second/.*
commitment_durations: ["1 hour", "2 hours", "3 hours"]
- resource: second/things
commitment_is_az_aware: false
- resource: second/capacity
commitment_is_az_aware: true
- resource: second/capacity_portion
commitment_is_az_aware: true
`

const testConvertCommitmentsYAML = `
Expand All @@ -95,10 +87,8 @@ const testConvertCommitmentsYAML = `
- resource: third/.*
commitment_durations: ["1 hour", "2 hours"]
- resource: first/capacity
commitment_is_az_aware: true
commitment_conversion: {identifier: flavor1, weight: 48}
- resource: second/capacity
commitment_is_az_aware: true
commitment_conversion: {identifier: flavor1, weight: 32}
- resource: third/capacity_c32
commitment_conversion: {identifier: flavor1, weight: 32}
Expand All @@ -118,6 +108,10 @@ func TestCommitmentLifecycleWithDelayedConfirmation(t *testing.T) {
test.WithConfig(testCommitmentsYAML),
test.WithAPIHandler(NewV1API),
)
plugin := s.Cluster.QuotaPlugins["first"].(*plugins.GenericQuotaPlugin)
plugin2 := s.Cluster.QuotaPlugins["second"].(*plugins.GenericQuotaPlugin)
plugin.LiquidServiceInfo.Resources = map[liquid.ResourceName]liquid.ResourceInfo{"capacity": {Topology: liquid.AZAwareResourceTopology}, "things": {Topology: liquid.FlatResourceTopology}}
plugin2.LiquidServiceInfo.Resources = map[liquid.ResourceName]liquid.ResourceInfo{"capacity": {Topology: liquid.AZAwareResourceTopology}, "things": {Topology: liquid.FlatResourceTopology}}

// GET returns an empty list if there are no commitments
assert.HTTPRequest{
Expand Down Expand Up @@ -477,6 +471,9 @@ func TestPutCommitmentErrorCases(t *testing.T) {
test.WithAPIHandler(NewV1API),
)

plugin := s.Cluster.QuotaPlugins["first"].(*plugins.GenericQuotaPlugin)
plugin.LiquidServiceInfo.Resources = map[liquid.ResourceName]liquid.ResourceInfo{"things": {Topology: liquid.FlatResourceTopology}}

request := assert.JSONObject{
"service_type": "first",
"resource_name": "capacity",
Expand Down
3 changes: 2 additions & 1 deletion internal/collector/capacity_scrape.go
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,7 @@ func (c *Collector) processCapacityScrapeTask(ctx context.Context, task capacity

func (c *Collector) confirmPendingCommitmentsIfNecessary(serviceType db.ServiceType, resourceName liquid.ResourceName) error {
behavior := c.Cluster.BehaviorForResource(serviceType, resourceName)
resInfo := c.Cluster.InfoForResource(serviceType, resourceName)
now := c.MeasureTime()

// do not run ConfirmPendingCommitments if commitments are not enabled (or not live yet) for this resource
Expand All @@ -378,7 +379,7 @@ func (c *Collector) confirmPendingCommitmentsIfNecessary(serviceType db.ServiceT
defer sqlext.RollbackUnlessCommitted(tx)

committableAZs := c.Cluster.Config.AvailabilityZones
if !behavior.CommitmentIsAZAware {
if resInfo.Topology == liquid.FlatResourceTopology {
committableAZs = []liquid.AvailabilityZone{liquid.AvailabilityZoneAny}
}
for _, az := range committableAZs {
Expand Down
2 changes: 1 addition & 1 deletion internal/collector/capacity_scrape_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ const (
- second/things
resource_behavior:
# enable commitments for the */capacity resources
- { resource: '.*/capacity', commitment_durations: [ '1 hour', '10 days' ], commitment_is_az_aware: true }
- { resource: '.*/capacity', commitment_durations: [ '1 hour', '10 days' ] }
# test that overcommit factor is considered when confirming commitments
- { resource: first/capacity, overcommit_factor: 10.0 }
quota_distribution_configs:
Expand Down
2 changes: 1 addition & 1 deletion internal/collector/commitment_cleanup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ const (
type: --test-generic
resource_behavior:
# enable commitments for the */capacity resources
- { resource: '.*/capacity', commitment_durations: [ '1 day', '3 years' ], commitment_is_az_aware: true }
- { resource: '.*/capacity', commitment_durations: [ '1 day', '3 years' ] }
`
)

Expand Down
32 changes: 26 additions & 6 deletions internal/collector/scrape.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,17 +208,29 @@ func (c *Collector) writeResourceScrapeResult(dbDomain db.Domain, dbProject db.P
srv := task.Service

for resName, resData := range resourceData {
resInfo := c.Cluster.InfoForResource(task.Service.Type, resName)
if len(resData.UsageData) == 0 {
// ensure that there is at least one ProjectAZResource for each ProjectResource
resData.UsageData = core.InAnyAZ(core.UsageData{Usage: 0})
resourceData[resName] = resData
} else {
// for AZ-aware resources, ensure that we also have a ProjectAZResource in
// "any", because ApplyComputedProjectQuota needs somewhere to write base
// quotas into if enabled
_, exists := resData.UsageData[liquid.AvailabilityZoneAny]
if !exists {
resData.UsageData[liquid.AvailabilityZoneAny] = &core.UsageData{Usage: 0}
// AZ separated resources will not include "any" AZ. The basequota will be distributed towards the existing AZs.
// If an AZ is not available within the scrape response, it will be created to store the basequota.
if resInfo.Topology == liquid.AZSeparatedResourceTopology {
for _, availabilityZone := range c.Cluster.Config.AvailabilityZones {
_, exists := resData.UsageData[availabilityZone]
if !exists {
resData.UsageData[availabilityZone] = &core.UsageData{Usage: 0}
}
}
} else {
// for AZ-aware resources, ensure that we also have a ProjectAZResource in
// "any", because ApplyComputedProjectQuota needs somewhere to write base
// quotas into if enabled
_, exists := resData.UsageData[liquid.AvailabilityZoneAny]
if !exists {
resData.UsageData[liquid.AvailabilityZoneAny] = &core.UsageData{Usage: 0}
}
}
}
}
Expand Down Expand Up @@ -303,6 +315,14 @@ func (c *Collector) writeResourceScrapeResult(dbDomain db.Domain, dbProject db.P
azRes.Usage = data.Usage
azRes.PhysicalUsage = data.PhysicalUsage

// set AZ backend quota.
resInfo := c.Cluster.InfoForResource(srv.Type, res.Name)
if resInfo.Topology == liquid.AZSeparatedResourceTopology && resInfo.HasQuota {
azRes.BackendQuota = data.Quota
} else {
azRes.BackendQuota = nil
}

// warn when the backend is inconsistent with itself
if data.Subresources != nil && uint64(len(data.Subresources)) != data.Usage {
logg.Info("resource quantity mismatch in project %s, resource %s/%s, AZ %s: usage = %d, but found %d subresources",
Expand Down
Loading

0 comments on commit 072f27e

Please sign in to comment.