From ce060c880688bc2f782cc2fa2a6f887df54907f0 Mon Sep 17 00:00:00 2001 From: Varsius <42544122+Varsius@users.noreply.github.com> Date: Mon, 25 Nov 2024 15:09:53 +0100 Subject: [PATCH] WIP: Add liquid nova report usage endpoint --- internal/liquids/nova/liquid.go | 95 +++++++++- internal/liquids/nova/subresources.go | 139 ++++++++++++++ internal/liquids/nova/usage.go | 250 ++++++++++++++++++++++++++ main.go | 1 + 4 files changed, 477 insertions(+), 8 deletions(-) create mode 100644 internal/liquids/nova/subresources.go create mode 100644 internal/liquids/nova/usage.go diff --git a/internal/liquids/nova/liquid.go b/internal/liquids/nova/liquid.go index 1980c508..e19a14a0 100644 --- a/internal/liquids/nova/liquid.go +++ b/internal/liquids/nova/liquid.go @@ -22,6 +22,9 @@ package nova import ( "context" "errors" + "fmt" + "regexp" + "slices" "time" "github.com/gophercloud/gophercloud/v2" @@ -31,7 +34,16 @@ import ( ) type Logic struct { - NovaV2 *gophercloud.ServiceClient `yaml:"-"` + // configuration + HypervisorType string `yaml:"hypervisor_type"` + WithSubresources bool `yaml:"with_subresources"` + // connections + NovaV2 *gophercloud.ServiceClient `yaml:"-"` + OSTypeProber *OSTypeProber `yaml:"-"` + ServerGroupProber *ServerGroupProber `yaml:"-"` + // computed state + ignoredFlavorNames []string `yaml:"-"` + hasPooledResource map[string]map[liquid.ResourceName]bool `yaml:"-"` } // Init implements the liquidapi.Logic interface. @@ -41,8 +53,64 @@ func (l *Logic) Init(ctx context.Context, provider *gophercloud.ProviderClient, return err } l.NovaV2.Microversion = "2.61" // to include extra specs in flavors.ListDetail() + cinderV3, err := openstack.NewBlockStorageV3(provider, eo) + if err != nil { + return err + } + glanceV2, err := openstack.NewImageV2(provider, eo) + if err != nil { + return err + } + l.OSTypeProber = NewOSTypeProber(l.NovaV2, cinderV3, glanceV2) + l.ServerGroupProber = NewServerGroupProber(l.NovaV2) + + // SAPCC extension: Nova may report quotas with this name pattern in its quota sets and quota class sets. + // If it does, instances with flavors that have the extra spec `quota:hw_version` set to the first match + // group of this regexp will count towards those quotas instead of the regular `cores/instances/ram` quotas. + // + // This initialization enumerates which such pooled resources exist. + defaultQuotaClassSet, err := getDefaultQuotaClassSet(ctx, l.NovaV2) + if err != nil { + return fmt.Errorf("while enumerating default quotas: %w", err) + } + l.hasPooledResource = make(map[string]map[liquid.ResourceName]bool) + hwVersionResourceRx := regexp.MustCompile(`^hw_version_(\S+)_(cores|instances|ram)$`) + for resourceName := range defaultQuotaClassSet { + match := hwVersionResourceRx.FindStringSubmatch(resourceName) + if match == nil { + continue + } + hwVersion, baseResourceName := match[1], liquid.ResourceName(match[2]) - return nil + if l.hasPooledResource[hwVersion] == nil { + l.hasPooledResource[hwVersion] = make(map[liquid.ResourceName]bool) + } + l.hasPooledResource[hwVersion][baseResourceName] = true + } + + return FlavorSelection{}.ForeachFlavor(ctx, l.NovaV2, func(f flavors.Flavor) error { + if IsIronicFlavor(f) { + l.ignoredFlavorNames = append(l.ignoredFlavorNames, f.Name) + } + return nil + }) +} + +func getDefaultQuotaClassSet(ctx context.Context, novaV2 *gophercloud.ServiceClient) (map[string]any, error) { + url := novaV2.ServiceURL("os-quota-class-sets", "default") + var result gophercloud.Result + _, err := novaV2.Get(ctx, url, &result.Body, nil) //nolint:bodyclose + if err != nil { + return nil, err + } + + var body struct { + //NOTE: cannot use map[string]int64 here because this object contains the + // field "id": "default" (curse you, untyped JSON) + QuotaClassSet map[string]any `json:"quota_class_set"` + } + err = result.ExtractInto(&body) + return body.QuotaClassSet, err } // BuildServiceInfo implements the liquidapi.Logic interface. @@ -74,7 +142,7 @@ func (l *Logic) BuildServiceInfo(ctx context.Context) (liquid.ServiceInfo, error } err := FlavorSelection{}.ForeachFlavor(ctx, l.NovaV2, func(f flavors.Flavor) error { - if f.ExtraSpecs["capabilities:hypervisor_type"] == "ironic" { + if IsIronicFlavor(f) { return nil } if f.ExtraSpecs["quota:separate"] == "true" { @@ -93,6 +161,18 @@ func (l *Logic) BuildServiceInfo(ctx context.Context) (liquid.ServiceInfo, error return liquid.ServiceInfo{ Version: time.Now().Unix(), Resources: resources, + UsageMetricFamilies: map[liquid.MetricName]liquid.MetricFamilyInfo{ + "liquid_nova_instance_counts_by_hypervisor": { + Type: liquid.MetricTypeCounter, // TODO: Counter or Gauge? + Help: "Total number of instances, grouped by hypervisor type.", // TODO: Is this correct? Liquid nova only has one hypervisor type if I understand correctly + LabelKeys: []string{"hypervisor_type"}, + }, + "liquid_nova_instance_counts_bvy_hypervisor_and_az": { + Type: liquid.MetricTypeCounter, // TODO: Same as above + Help: "Total number of instances in each availability zone, grouped by hypervisor type.", + LabelKeys: []string{"hypervisor_type", "availability_zone"}, + }, + }, }, nil } @@ -101,12 +181,11 @@ func (l *Logic) ScanCapacity(ctx context.Context, req liquid.ServiceCapacityRequ return liquid.ServiceCapacityReport{}, errors.New("TODO") } -// ScanUsage implements the liquidapi.Logic interface. -func (l *Logic) ScanUsage(ctx context.Context, projectUUID string, req liquid.ServiceUsageRequest, serviceInfo liquid.ServiceInfo) (liquid.ServiceUsageReport, error) { - return liquid.ServiceUsageReport{}, errors.New("TODO") -} - // SetQuota implements the liquidapi.Logic interface. func (l *Logic) SetQuota(ctx context.Context, projectUUID string, req liquid.ServiceQuotaRequest, serviceInfo liquid.ServiceInfo) error { return errors.New("TODO") } + +func (l *Logic) IgnoreFlavor(flavorName string) bool { + return slices.Contains(l.ignoredFlavorNames, flavorName) +} diff --git a/internal/liquids/nova/subresources.go b/internal/liquids/nova/subresources.go new file mode 100644 index 000000000..b525ab1a --- /dev/null +++ b/internal/liquids/nova/subresources.go @@ -0,0 +1,139 @@ +/******************************************************************************* +* +* Copyright 2024 SAP SE +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You should have received a copy of the License along with this +* program. If not, you may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +*******************************************************************************/ + +package nova + +import ( + "context" + "encoding/json" + "fmt" + "strconv" + + "github.com/gophercloud/gophercloud/v2" + "github.com/gophercloud/gophercloud/v2/openstack/compute/v2/servers" + "github.com/gophercloud/gophercloud/v2/pagination" + "github.com/sapcc/go-api-declarations/liquid" +) + +type SubresourceAttributes struct { + // base metadata + Status string `json:"status"` + Metadata map[string]string `json:"metadata"` + Tags []string `json:"tags"` + // placement information + AZ liquid.AvailabilityZone `json:"availability_zone"` + HypervisorType string `json:"hypervisor,omitempty"` + // information from flavor + FlavorName string `json:"flavor"` + VCPUs uint64 `json:"vcpu"` + MemoryMiB uint64 `json:"ram"` + DiskGiB uint64 `json:"disk"` + VideoMemoryMiB *uint64 `json:"video_ram,omitempty"` + HWVersion string `json:"-"` // this is only used for sorting the subresource into the right resource + // information from image + OSType string `json:"os_type"` +} + +func (l *Logic) buildInstanceSubresource(ctx context.Context, instance servers.Server) (res liquid.Subresource, err error) { + // copy base attributes + res.ID = instance.ID + res.Name = instance.Name + + attrs := SubresourceAttributes{ + Status: instance.Status, + AZ: liquid.AvailabilityZone(instance.AvailabilityZone), + Metadata: instance.Metadata, + } + if instance.Tags != nil { + attrs.Tags = *instance.Tags + } + + // flavor data is given to us as a map[string]any, but we want something more structured + buf, err := json.Marshal(instance.Flavor) + if err != nil { + return res, fmt.Errorf("could not reserialize flavor data for instance %s: %w", instance.ID, err) + } + var flavorInfo FlavorInfo + err = json.Unmarshal(buf, &flavorInfo) + if err != nil { + return res, fmt.Errorf("could not parse flavor data for instance %s: %w", instance.ID, err) + } + + // copy attributes from flavor data + attrs.FlavorName = flavorInfo.OriginalName + attrs.VCPUs = flavorInfo.VCPUs + attrs.MemoryMiB = flavorInfo.MemoryMiB + attrs.DiskGiB = flavorInfo.DiskGiB + if videoRAMStr, exists := flavorInfo.ExtraSpecs["hw_video:ram_max_mb"]; exists { + videoRAMVal, err := strconv.ParseUint(videoRAMStr, 10, 64) + if err == nil { + attrs.VideoMemoryMiB = &videoRAMVal + } + } + attrs.HWVersion = flavorInfo.ExtraSpecs["quota:hw_version"] + + // calculate classifications based on flavor data (NOTE: deprecated, only here for backwards compatibility) + attrs.HypervisorType = l.HypervisorType + + // calculate classifications based on image data + attrs.OSType = l.OSTypeProber.Get(ctx, instance) + + buf, err = json.Marshal(attrs) + if err != nil { + return res, fmt.Errorf("while serializing Subresource Attributes: %w", err) + } + res.Attributes = json.RawMessage(buf) + return res, nil +} + +func (l *Logic) buildInstanceSubresources(ctx context.Context, projectUUID string) ([]liquid.Subresource, error) { + opts := novaServerListOpts{ + AllTenants: true, + TenantID: projectUUID, + } + + var result []liquid.Subresource + err := servers.List(l.NovaV2, opts).EachPage(ctx, func(ctx context.Context, page pagination.Page) (bool, error) { + var instances []servers.Server + err := servers.ExtractServersInto(page, &instances) + if err != nil { + return false, err + } + + for _, instance := range instances { + res, err := l.buildInstanceSubresource(ctx, instance) + if err != nil { + return false, err + } + result = append(result, res) + } + return true, nil + }) + return result, err +} + +type novaServerListOpts struct { + AllTenants bool `q:"all_tenants"` + TenantID string `q:"tenant_id"` +} + +func (opts novaServerListOpts) ToServerListQuery() (string, error) { + q, err := gophercloud.BuildQueryString(opts) + return q.String(), err +} diff --git a/internal/liquids/nova/usage.go b/internal/liquids/nova/usage.go new file mode 100644 index 000000000..65555e9c --- /dev/null +++ b/internal/liquids/nova/usage.go @@ -0,0 +1,250 @@ +/******************************************************************************* +* +* Copyright 2024 SAP SE +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You should have received a copy of the License along with this +* program. If not, you may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +*******************************************************************************/ + +package nova + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/gophercloud/gophercloud/v2/openstack/compute/v2/limits" + "github.com/sapcc/go-api-declarations/liquid" +) + +// UnknownAZResourceUsageReports is a convenience constructor for PerAZ that puts all data in the "unknown" AZ. +// Adds zero-valued entries for all other AZs. +// Use this for data relating to AZ-aware resources where the AZ association is unknown. +func UnknownAZResourceUsageReports(data *liquid.AZResourceUsageReport, allAZs []liquid.AvailabilityZone) map[liquid.AvailabilityZone]*liquid.AZResourceUsageReport { + usageReports := make(map[liquid.AvailabilityZone]*liquid.AZResourceUsageReport) + for _, az := range allAZs { + usageReports[az] = &liquid.AZResourceUsageReport{} + } + if data != nil { + usageReports[liquid.AvailabilityZoneUnknown] = data + } + + return usageReports +} + +// UsageInAZ is like `r.PerAZ[az]`, but inserts a new zero-valued AZResourceUsageReport on first access. +// This is useful when calculating AZ-aware usage by iterating through a list of AZ-localized objects. +func UsageInAZ(r *liquid.ResourceUsageReport, az liquid.AvailabilityZone) *liquid.AZResourceUsageReport { + if r.PerAZ == nil { + panic("ResourceData.GetOrCreateEntry cannot operate on a nil PerAZ") + } + entry := r.PerAZ[az] + if entry == nil { + entry = &liquid.AZResourceUsageReport{} + r.PerAZ[az] = entry + } + return entry +} + +func (l *Logic) pooledResourceName(hwVersion string, base liquid.ResourceName) liquid.ResourceName { + // `base` is one of "cores", "instances" or "ram" + if hwVersion == "" { + return base + } + + // if we saw a "quota:hw_version" extra spec on the instance's flavor, use the appropriate resource if it exists + if l.hasPooledResource[hwVersion][base] { + return liquid.ResourceName(fmt.Sprintf("hw_version_%s_instances", hwVersion)) + } + return base +} + +func (l *Logic) ScanUsage(ctx context.Context, projectUUID string, req liquid.ServiceUsageRequest, serviceInfo liquid.ServiceInfo) (liquid.ServiceUsageReport, error) { + var limitsData struct { + Limits struct { + Absolute struct { + MaxTotalCores int64 `json:"maxTotalCores"` + MaxTotalInstances int64 `json:"maxTotalInstances"` + MaxTotalRAMSize int64 `json:"maxTotalRAMSize"` + MaxServerGroups int64 `json:"maxServerGroups"` + MaxServerGroupMembers int64 `json:"maxServerGroupMembers"` + TotalCoresUsed uint64 `json:"totalCoresUsed"` + TotalInstancesUsed uint64 `json:"totalInstancesUsed"` + TotalRAMUsed uint64 `json:"totalRAMUsed"` + TotalServerGroupsUsed uint64 `json:"totalServerGroupsUsed"` + } `json:"absolute"` + AbsolutePerFlavor map[string]struct { + MaxTotalInstances int64 `json:"maxTotalInstances"` + TotalInstancesUsed uint64 `json:"totalInstancesUsed"` + } `json:"absolutePerFlavor"` + AbsolutePerHWVersion map[string]struct { + MaxTotalCores int64 `json:"maxTotalCores"` + MaxTotalInstances int64 `json:"maxTotalInstances"` + MaxTotalRAMSize int64 `json:"maxTotalRAMSize"` + TotalCoresUsed uint64 `json:"totalCoresUsed"` + TotalInstancesUsed uint64 `json:"totalInstancesUsed"` + TotalRAMUsed uint64 `json:"totalRAMUsed"` + } `json:"absolutePerHwVersion"` + } `json:"limits"` + } + err := limits.Get(ctx, l.NovaV2, limits.GetOpts{TenantID: projectUUID}).ExtractInto(&limitsData) + if err != nil { + return liquid.ServiceUsageReport{}, err + } + absoluteLimits := limitsData.Limits.Absolute + var totalServerGroupMembersUsed uint64 + if absoluteLimits.TotalServerGroupsUsed > 0 { + totalServerGroupMembersUsed, err = l.ServerGroupProber.GetMemberUsageForProject(ctx, projectUUID) + if err != nil { + return liquid.ServiceUsageReport{}, err + } + } + + // initialize `Resources` + resources := map[liquid.ResourceName]*liquid.ResourceUsageReport{ + "cores": { + Quota: &absoluteLimits.MaxTotalCores, + PerAZ: UnknownAZResourceUsageReports(&liquid.AZResourceUsageReport{Usage: absoluteLimits.TotalCoresUsed}, req.AllAZs), + }, + "instances": { + Quota: &absoluteLimits.MaxTotalInstances, + PerAZ: UnknownAZResourceUsageReports(&liquid.AZResourceUsageReport{Usage: absoluteLimits.TotalInstancesUsed}, req.AllAZs), + }, + "ram": { + Quota: &absoluteLimits.MaxTotalRAMSize, + PerAZ: UnknownAZResourceUsageReports(&liquid.AZResourceUsageReport{Usage: absoluteLimits.TotalRAMUsed}, req.AllAZs), + }, + "server_groups": { + Quota: &absoluteLimits.MaxServerGroups, + PerAZ: UnknownAZResourceUsageReports(&liquid.AZResourceUsageReport{Usage: absoluteLimits.TotalServerGroupsUsed}, req.AllAZs), + }, + "server_group_members": { + Quota: &absoluteLimits.MaxServerGroupMembers, + PerAZ: UnknownAZResourceUsageReports(&liquid.AZResourceUsageReport{Usage: totalServerGroupMembersUsed}, req.AllAZs), + }, + } + for flavorName, flavorLimits := range limitsData.Limits.AbsolutePerFlavor { + if l.IgnoreFlavor(flavorName) { + continue + } + resourceName := ResourceNameForFlavor(flavorName) + resources[resourceName] = &liquid.ResourceUsageReport{ + Quota: &flavorLimits.MaxTotalInstances, + PerAZ: UnknownAZResourceUsageReports(&liquid.AZResourceUsageReport{Usage: flavorLimits.TotalInstancesUsed}, req.AllAZs), + } + } + for hwVersion, limits := range limitsData.Limits.AbsolutePerHWVersion { + if l.hasPooledResource[hwVersion]["cores"] { + resources[l.pooledResourceName(hwVersion, "cores")] = &liquid.ResourceUsageReport{ + Quota: &limits.MaxTotalCores, + PerAZ: UnknownAZResourceUsageReports(&liquid.AZResourceUsageReport{Usage: limits.TotalCoresUsed}, req.AllAZs), + } + } + if l.hasPooledResource[hwVersion]["instances"] { + resources[l.pooledResourceName(hwVersion, "instances")] = &liquid.ResourceUsageReport{ + Quota: &limits.MaxTotalInstances, + PerAZ: UnknownAZResourceUsageReports(&liquid.AZResourceUsageReport{Usage: limits.TotalInstancesUsed}, req.AllAZs), + } + } + if l.hasPooledResource[hwVersion]["ram"] { + resources[l.pooledResourceName(hwVersion, "ram")] = &liquid.ResourceUsageReport{ + Quota: &limits.MaxTotalRAMSize, + PerAZ: UnknownAZResourceUsageReports(&liquid.AZResourceUsageReport{Usage: limits.TotalRAMUsed}, req.AllAZs), + } + } + } + + // Nova does not have a native API for AZ-aware usage reporting, + // so we will obtain AZ-aware usage stats by counting up all subresources, + // even if we don't end up showing them in the API + allSubresources, err := l.buildInstanceSubresources(ctx, projectUUID) + if err != nil { + return liquid.ServiceUsageReport{}, fmt.Errorf("while collecting instance data: %w", err) + } + + for _, subres := range allSubresources { + var attrs SubresourceAttributes + if err = json.Unmarshal(subres.Attributes, &attrs); err != nil { // TODO: Unmarshalling here + marshalling when building the subresources seems inefficient + return liquid.ServiceUsageReport{}, err + } + + az := attrs.AZ + + if l.IgnoreFlavor(attrs.FlavorName) { + continue + } + + // use separate instance resource if we have a matching "instances_$FLAVOR" resource + instanceResourceName := ResourceNameForFlavor(attrs.FlavorName) + isPooled := false + if _, exists := resources[instanceResourceName]; !exists { + // otherwise used the appropriate pooled instance resource + isPooled = true + instanceResourceName = l.pooledResourceName(attrs.HWVersion, "instances") + } + + // count subresource towards "instances" (or separate instance resource) + resources[instanceResourceName].AddLocalizedUsage(az, 1) + if l.WithSubresources { + azData := UsageInAZ(resources[instanceResourceName], az) + azData.Subresources = append(azData.Subresources, subres) + } + + // if counted towards separate instance resource, do not count towards "cores" and "ram" + if !isPooled { + continue + } + + // count towards "cores" and "ram" under the appropriate pooled resource + resources[l.pooledResourceName(attrs.HWVersion, "cores")].AddLocalizedUsage(az, attrs.VCPUs) + resources[l.pooledResourceName(attrs.HWVersion, "ram")].AddLocalizedUsage(az, attrs.MemoryMiB) + } + + // calculate metrics + var metrics map[liquid.MetricName][]liquid.Metric + if l.HypervisorType != "" { + countsByAZ := map[liquid.AvailabilityZone]uint64{liquid.AvailabilityZoneUnknown: 0} + for _, subres := range allSubresources { + var attrs SubresourceAttributes + if err = json.Unmarshal(subres.Attributes, &attrs); err != nil { // TODO: Again unmarshalling + return liquid.ServiceUsageReport{}, err + } + countsByAZ[attrs.AZ]++ + } + + countsByAZMetric := make([]liquid.Metric, len(countsByAZ)) + idx := 0 + for az, count := range countsByAZ { + countsByAZMetric[idx] = liquid.Metric{ + Value: float64(count), + LabelValues: []string{l.HypervisorType, string(az)}, + } + idx++ + } + + metrics = map[liquid.MetricName][]liquid.Metric{ + "liquid_nova_instance_counts_by_hypervisor": {{ + Value: float64(len(allSubresources)), + LabelValues: []string{l.HypervisorType}, + }}, + "liquid_nova_instance_counts_bvy_hypervisor_and_az": countsByAZMetric, + } + } + + return liquid.ServiceUsageReport{ + InfoVersion: serviceInfo.Version, + Resources: resources, + Metrics: metrics, + }, nil +} diff --git a/main.go b/main.go index 03a9edb1..0dcd50a5 100644 --- a/main.go +++ b/main.go @@ -122,6 +122,7 @@ func main() { case "neutron": must.Succeed(liquidapi.Run(ctx, &neutron.Logic{}, opts)) case "nova": + opts.TakesConfiguration = true must.Succeed(liquidapi.Run(ctx, &nova.Logic{}, opts)) case "octavia": must.Succeed(liquidapi.Run(ctx, &octavia.Logic{}, opts))