From 9582504403970b74998881b1b6a1ce1964616506 Mon Sep 17 00:00:00 2001 From: Stefan Majewsky Date: Tue, 17 Dec 2024 11:08:10 +0100 Subject: [PATCH 1/2] avoid backend_quota edit war for AZSeparatedResourceTopology I'm seeing a lot of this in the DB right now: ``` psql> SELECT pr.name, pr.quota, pr.backend_quota, par.az, par.quota, par.usage FROM project_resources pr LEFT OUTER JOIN project_az_resources par ON par.resource_id = pr.id WHERE pr.service_id = ( SELECT id FROM project_services WHERE type = 'ceph' AND quota_desynced_at IS NOT NULL ORDER BY quota_desynced_at LIMIT 1 ); +----------------+--------+---------------+----------+-------+-------+ | name | quota | backend_quota | az | quota | usage | |----------------+--------+---------------+----------+-------+-------| | local-premium | | 0 | qa-de-1a | 0 | 0 | | local-premium | | 0 | qa-de-1b | 0 | 0 | | local-premium | | 0 | qa-de-1d | 0 | 0 | | region-premium | 0 | 0 | any | 0 | 0 | +----------------+--------+---------------+----------+-------+-------+ ``` Scrape wrote a zero into `project_resources.backend_quota` which caused quota-sync to run and write a null again. --- internal/collector/scrape.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/internal/collector/scrape.go b/internal/collector/scrape.go index 53495ef6..7157295d 100644 --- a/internal/collector/scrape.go +++ b/internal/collector/scrape.go @@ -257,7 +257,9 @@ func (c *Collector) writeResourceScrapeResult(dbDomain db.Domain, dbProject db.P resInfo := c.Cluster.InfoForResource(srv.Type, res.Name) if resInfo.HasQuota { - res.BackendQuota = &backendQuota + if resInfo.Topology != liquid.AZSeparatedResourceTopology { + res.BackendQuota = &backendQuota + } res.MinQuotaFromBackend = resourceData[res.Name].MinQuota res.MaxQuotaFromBackend = resourceData[res.Name].MaxQuota } @@ -438,6 +440,10 @@ func (c *Collector) writeDummyResources(dbDomain db.Domain, dbProject db.Project } } + // FIXME: These dummy resources do not conform to `resInfo.Topology` and are never AZ-aware. + // I'm not fixing this right now because dummy resources are an extremely rare corner-case anyway. + // TODO: When we rework the DB schema next year, we should build it so that dummy resources can be avoided entirely. + // update scraped_at timestamp and reset stale flag to make sure that we do // not scrape this service again immediately afterwards if there are other // stale services to cover first From bb680403f29d9e313f52a22a27e146fd5fce691c Mon Sep 17 00:00:00 2001 From: Stefan Majewsky Date: Tue, 17 Dec 2024 11:26:37 +0100 Subject: [PATCH 2/2] scrape: never write Quota/BackendQuota != nil for AZSeparatedResourceTopology --- internal/collector/scrape_test.go | 32 +++++++------------ internal/datamodel/project_resource_update.go | 4 +-- internal/test/plugins/quota_generic.go | 8 +++-- 3 files changed, 19 insertions(+), 25 deletions(-) diff --git a/internal/collector/scrape_test.go b/internal/collector/scrape_test.go index 11300394..10f9a7b3 100644 --- a/internal/collector/scrape_test.go +++ b/internal/collector/scrape_test.go @@ -622,35 +622,29 @@ func Test_TopologyScrapes(t *testing.T) { INSERT INTO project_az_resources (id, resource_id, az, usage, subresources, historical_usage, backend_quota) VALUES (7, 3, 'az-two', 2, '[{"index":2},{"index":3}]', '{"t":[%[1]d],"v":[2]}', 21); INSERT INTO project_az_resources (id, resource_id, az, usage, physical_usage, historical_usage, backend_quota) VALUES (8, 4, 'az-one', 0, 0, '{"t":[%[3]d],"v":[0]}', 50); INSERT INTO project_az_resources (id, resource_id, az, usage, physical_usage, historical_usage, backend_quota) VALUES (9, 4, 'az-two', 0, 0, '{"t":[%[3]d],"v":[0]}', 50); - INSERT INTO project_resources (id, service_id, name, quota, backend_quota) VALUES (1, 1, 'capacity', 0, 100); + INSERT INTO project_resources (id, service_id, name) VALUES (1, 1, 'capacity'); INSERT INTO project_resources (id, service_id, name) VALUES (2, 1, 'capacity_portion'); - INSERT INTO project_resources (id, service_id, name, quota, backend_quota) VALUES (3, 1, 'things', 0, 42); - INSERT INTO project_resources (id, service_id, name, quota, backend_quota) VALUES (4, 2, 'capacity', 0, 100); + INSERT INTO project_resources (id, service_id, name) VALUES (3, 1, 'things'); + INSERT INTO project_resources (id, service_id, name) VALUES (4, 2, 'capacity'); INSERT INTO project_resources (id, service_id, name) VALUES (5, 2, 'capacity_portion'); - INSERT INTO project_resources (id, service_id, name, quota, backend_quota) VALUES (6, 2, 'things', 0, 42); - UPDATE project_services SET scraped_at = %[1]d, scrape_duration_secs = 5, serialized_metrics = '{"capacity_usage":0,"things_usage":4}', checked_at = %[1]d, next_scrape_at = %[2]d, quota_desynced_at = %[1]d WHERE id = 1 AND project_id = 1 AND type = 'unittest'; - UPDATE project_services SET scraped_at = %[3]d, scrape_duration_secs = 5, serialized_metrics = '{"capacity_usage":0,"things_usage":4}', checked_at = %[3]d, next_scrape_at = %[4]d, quota_desynced_at = %[3]d WHERE id = 2 AND project_id = 2 AND type = 'unittest'; + INSERT INTO project_resources (id, service_id, name) VALUES (6, 2, 'things'); + UPDATE project_services SET scraped_at = %[1]d, scrape_duration_secs = 5, serialized_metrics = '{"capacity_usage":0,"things_usage":4}', checked_at = %[1]d, next_scrape_at = %[2]d WHERE id = 1 AND project_id = 1 AND type = 'unittest'; + UPDATE project_services SET scraped_at = %[3]d, scrape_duration_secs = 5, serialized_metrics = '{"capacity_usage":0,"things_usage":4}', checked_at = %[3]d, next_scrape_at = %[4]d WHERE id = 2 AND project_id = 2 AND type = 'unittest'; `, scrapedAt1.Unix(), scrapedAt1.Add(scrapeInterval).Unix(), scrapedAt2.Unix(), scrapedAt2.Add(scrapeInterval).Unix(), ) // set some quota acpq values. - // resource level (ACPQ always writes NULL on this level for AZSeparatedResourceTopology) - _, err := s.DB.Exec(`UPDATE project_resources SET quota = NULL WHERE name = $1`, "capacity") + _, err := s.DB.Exec(`UPDATE project_az_resources SET quota = $1 WHERE resource_id IN (1,4) and az != 'any'`, 20) if err != nil { t.Fatal(err) } - _, err = s.DB.Exec(`UPDATE project_resources SET quota = NULL WHERE name = $1`, "things") - if err != nil { - t.Fatal(err) - } - // az level - _, err = s.DB.Exec(`UPDATE project_az_resources SET quota = $1 WHERE resource_id IN (1,4) and az != 'any'`, 20) + _, err = s.DB.Exec(`UPDATE project_az_resources SET quota = $1 WHERE resource_id IN (3,6) and az != 'any'`, 13) if err != nil { t.Fatal(err) } - _, err = s.DB.Exec(`UPDATE project_az_resources SET quota = $1 WHERE resource_id IN (3,6) and az != 'any'`, 13) + _, err = s.DB.Exec(`UPDATE project_services SET quota_desynced_at = $1`, s.Clock.Now()) if err != nil { t.Fatal(err) } @@ -668,10 +662,6 @@ func Test_TopologyScrapes(t *testing.T) { UPDATE project_az_resources SET backend_quota = 13 WHERE id = 7 AND resource_id = 3 AND az = 'az-two'; UPDATE project_az_resources SET backend_quota = 20 WHERE id = 8 AND resource_id = 4 AND az = 'az-one'; UPDATE project_az_resources SET backend_quota = 20 WHERE id = 9 AND resource_id = 4 AND az = 'az-two'; - UPDATE project_resources SET backend_quota = NULL WHERE id = 1 AND service_id = 1 AND name = 'capacity'; - UPDATE project_resources SET backend_quota = NULL WHERE id = 3 AND service_id = 1 AND name = 'things'; - UPDATE project_resources SET backend_quota = NULL WHERE id = 4 AND service_id = 2 AND name = 'capacity'; - UPDATE project_resources SET backend_quota = NULL WHERE id = 6 AND service_id = 2 AND name = 'things'; UPDATE project_services SET quota_desynced_at = NULL, quota_sync_duration_secs = 5 WHERE id = 1 AND project_id = 1 AND type = 'unittest'; UPDATE project_services SET quota_desynced_at = NULL, quota_sync_duration_secs = 5 WHERE id = 2 AND project_id = 2 AND type = 'unittest'; `) @@ -696,9 +686,7 @@ func Test_TopologyScrapes(t *testing.T) { UPDATE project_az_resources SET backend_quota = NULL WHERE id = 7 AND resource_id = 3 AND az = 'az-two'; UPDATE project_az_resources SET backend_quota = 50 WHERE id = 8 AND resource_id = 4 AND az = 'az-one'; UPDATE project_az_resources SET backend_quota = 50 WHERE id = 9 AND resource_id = 4 AND az = 'az-two'; - UPDATE project_resources SET quota = 0, backend_quota = 40 WHERE id = 1 AND service_id = 1 AND name = 'capacity'; UPDATE project_resources SET quota = 0, backend_quota = 26 WHERE id = 3 AND service_id = 1 AND name = 'things'; - UPDATE project_resources SET quota = 0, backend_quota = 40 WHERE id = 4 AND service_id = 2 AND name = 'capacity'; UPDATE project_resources SET quota = 0, backend_quota = 26 WHERE id = 6 AND service_id = 2 AND name = 'things'; UPDATE project_services SET scraped_at = %[1]d, checked_at = %[1]d, next_scrape_at = %[2]d, quota_desynced_at = %[1]d WHERE id = 1 AND project_id = 1 AND type = 'unittest'; UPDATE project_services SET scraped_at = %[3]d, checked_at = %[3]d, next_scrape_at = %[4]d, quota_desynced_at = %[3]d WHERE id = 2 AND project_id = 2 AND type = 'unittest'; @@ -724,6 +712,8 @@ func Test_TopologyScrapes(t *testing.T) { DELETE FROM project_az_resources WHERE id = 16 AND resource_id = 6 AND az = 'any'; UPDATE project_az_resources SET backend_quota = 21 WHERE id = 6 AND resource_id = 3 AND az = 'az-one'; UPDATE project_az_resources SET usage = 0, subresources = '', historical_usage = '{"t":[%[1]d,%[3]d],"v":[2,0]}' WHERE id = 7 AND resource_id = 3 AND az = 'az-two'; + UPDATE project_resources SET quota = NULL, backend_quota = NULL WHERE id = 3 AND service_id = 1 AND name = 'things'; + UPDATE project_resources SET quota = NULL, backend_quota = NULL WHERE id = 6 AND service_id = 2 AND name = 'things'; UPDATE project_services SET scraped_at = %[3]d, serialized_metrics = '{"capacity_usage":0,"things_usage":2}', checked_at = %[3]d, next_scrape_at = %[4]d WHERE id = 1 AND project_id = 1 AND type = 'unittest'; UPDATE project_services SET scraped_at = %[5]d, serialized_metrics = '{"capacity_usage":0,"things_usage":2}', checked_at = %[5]d, next_scrape_at = %[6]d WHERE id = 2 AND project_id = 2 AND type = 'unittest'; `, diff --git a/internal/datamodel/project_resource_update.go b/internal/datamodel/project_resource_update.go index edf6906a..bb27640e 100644 --- a/internal/datamodel/project_resource_update.go +++ b/internal/datamodel/project_resource_update.go @@ -141,7 +141,7 @@ func (u ProjectResourceUpdate) Run(dbi db.Interface, cluster *core.Cluster, now result = append(result, res) // check if we need to arrange for SetQuotaJob to look at this project service - if resInfo.HasQuota { + if resInfo.HasQuota && resInfo.Topology != liquid.AZSeparatedResourceTopology { backendQuota := unwrapOrDefault(res.BackendQuota, -1) quota := *res.Quota // definitely not nil, it was set above in validateResourceConstraints() if backendQuota < 0 || uint64(backendQuota) != quota { @@ -172,7 +172,7 @@ func unwrapOrDefault[T any](value *T, defaultValue T) T { // Ensures that `res` conforms to various constraints and validation rules. func validateResourceConstraints(res *db.ProjectResource, resInfo liquid.ResourceInfo) { - if !resInfo.HasQuota { + if !resInfo.HasQuota || resInfo.Topology == liquid.AZSeparatedResourceTopology { // ensure that NoQuota resources do not contain any quota values res.Quota = nil res.BackendQuota = nil diff --git a/internal/test/plugins/quota_generic.go b/internal/test/plugins/quota_generic.go index 1373b07c..1d0cadbb 100644 --- a/internal/test/plugins/quota_generic.go +++ b/internal/test/plugins/quota_generic.go @@ -195,8 +195,12 @@ func (p *GenericQuotaPlugin) Scrape(ctx context.Context, project core.KeystonePr } // populate azSeparatedQuota - for az, data := range copyOfVal.UsageData { - data.Quota = val.UsageData[az].Quota + topology := p.LiquidServiceInfo.Resources[key].Topology + if topology == liquid.AZSeparatedResourceTopology { + copyOfVal.Quota = 0 + for az, data := range copyOfVal.UsageData { + data.Quota = val.UsageData[az].Quota + } } // test coverage for PhysicalUsage != Usage