Skip to content

Commit

Permalink
Configurable workload node selectors and tolerations (Fixes aws-obser…
Browse files Browse the repository at this point in the history
  • Loading branch information
Charles Bushong committed May 29, 2024
1 parent 18ebaf4 commit e907bef
Show file tree
Hide file tree
Showing 8 changed files with 46 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ spec:
mode: daemonset
nodeSelector:
kubernetes.io/os: linux
{{- if .Values.agent.nodeSelector }}
{{- toYaml .Values.agent.nodeSelector | nindent 4 }}
{{- end }}
serviceAccount: {{ template "cloudwatch-agent.serviceAccountName" . }}
{{- if .Values.agent.config }}
config: {{ include "cloudwatch-agent.modify-config" (merge (dict "Config" .Values.agent.config) . ) }}
Expand Down Expand Up @@ -111,7 +114,7 @@ spec:
valueFrom:
fieldRef:
fieldPath: metadata.namespace
{{- with .Values.tolerations }}
{{- with (.Values.agent.tolerations | default .Values.tolerations ) }}
tolerations: {{- toYaml . | nindent 2}}
{{- end }}
{{- end }}
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ spec:
image: {{ template "dcgm-exporter.image" . }}
nodeSelector:
kubernetes.io/os: linux
{{- with .Values.dcgmExporter.additionalNodeSelector }}
{{- toYaml . | nindent 4 }}
{{- end }}
{{- with (.Values.dcgmExporter.tolerations | default .Values.tolerations ) }}
tolerations: {{- toYaml . | nindent 2}}
{{- end }}
serviceAccount: {{ template "dcgm-exporter.serviceAccountName" . }}
affinity:
nodeAffinity:
Expand Down Expand Up @@ -71,6 +77,3 @@ spec:
tls_server_config:
cert_file: /etc/amazon-cloudwatch-observability-dcgm-cert/server.crt
key_file: /etc/amazon-cloudwatch-observability-dcgm-cert/server.key
{{- with .Values.tolerations }}
tolerations: {{- toYaml . | nindent 2}}
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,10 @@ spec:
serviceAccountName: {{ template "cloudwatch-agent.serviceAccountName" . }}
nodeSelector:
kubernetes.io/os: linux
{{- with .Values.tolerations }}
{{- with .Values.fluentBit.additionalNodeSelector }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with (.Values.fluentBit.tolerations | default .Values.tolerations ) }}
tolerations: {{- toYaml . | nindent 6}}
{{- end }}
{{- end }}
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ spec:
- key: {{ .Values.nodeLabelKey }}
operator: In
values: {{ .Values.neuronInstances | toYaml | nindent 20 }}
{{- with .Values.neuronMonitor.additionalNodeAffinityTerms }}
{{- toYaml . | nindent 10 }}
{{- end }
resources:
limits:
cpu: 500m
Expand Down Expand Up @@ -91,6 +94,6 @@ spec:
}
]
}
{{- with .Values.tolerations }}
{{- with (.Values.neuronMonitor.tolerations | default .Values.tolerations ) }}
tolerations: {{- toYaml . | nindent 2}}
{{- end }}
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,6 @@ spec:
secretName: {{ template "amazon-cloudwatch-observability.certificateSecretName" . }}
nodeSelector:
kubernetes.io/os: linux
{{- with .Values.tolerations }}
{{- with (.Values.manager.tolerations | default .Values.tolerations ) }}
tolerations: {{- toYaml . | nindent 6}}
{{- end }}
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ spec:
serviceAccount: {{ template "cloudwatch-agent.serviceAccountName" . }}
nodeSelector:
kubernetes.io/os: windows
{{- with .Values.agent.additionalNodeSelector }}
{{- toYaml . | nindent 4 }}
{{- end }}
config: {{ .Values.agent.windowsDefaultConfig | toJson | quote }}
resources:
requests:
Expand Down Expand Up @@ -47,7 +50,7 @@ spec:
value: "True"
- name: RUN_AS_HOST_PROCESS_CONTAINER
value: "True"
{{- with .Values.tolerations }}
{{- with (.Values.agent.tolerations | default .Values.tolerations ) }}
tolerations: {{- toYaml . | nindent 2}}
{{- end }}
{{- end }}
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ spec:
hostNetwork: true
nodeSelector:
kubernetes.io/os: windows
{{- with .Values.fluentBit.additionalNodeSelector }}
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- name: fluent-bit
image: {{ template "fluent-bit-windows.image" . }}
Expand Down Expand Up @@ -70,7 +73,7 @@ spec:
terminationGracePeriodSeconds: 10
dnsPolicy: ClusterFirstWithHostNet
serviceAccountName: {{ template "cloudwatch-agent.serviceAccountName" . }}
{{- with .Values.tolerations }}
{{- with (.Values.fluentBit.tolerations | default .Values.tolerations ) }}
tolerations: {{- toYaml . | nindent 6}}
{{- end }}
{{- end }}
{{- end }}
14 changes: 13 additions & 1 deletion charts/amazon-cloudwatch-observability/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ neuronInstances: [ trn1.2xlarge, trn1.32xlarge, trn1n.32xlarge, inf1.xlarge, inf
tolerations:
- operator: Exists

fluentBit:
tolerations: [] # Override the default tolerations
additionalNodeSelector: {} # Additional node selector key-value pairs

containerLogs:
enabled: true
fluentBit:
Expand Down Expand Up @@ -423,6 +427,8 @@ manager:

service:
name:
tolerations: [] # Override the default tolerations
additionalNodeSelector: {} # Additional node selector key-value pairs

## Admission webhooks make sure only requests with correctly formatted rules will get into the Operator.
admissionWebhooks:
Expand Down Expand Up @@ -532,6 +538,8 @@ agent:
}
}
}
tolerations: [] # Override the default tolerations
additionalNodeSelector: {} # Additional node selector key-value pairs

dcgmExporter:
name:
Expand All @@ -554,6 +562,8 @@ dcgmExporter:
kubeletPath: "/var/lib/kubelet/pod-resources"
serviceAccount:
name: # override exporter service account name
tolerations: [] # Override the default tolerations
additionalNodeSelector: {} # Additional node selector key-value pairs

neuronMonitor:
name:
Expand All @@ -574,4 +584,6 @@ neuronMonitor:
capabilities:
add: ["SYS_ADMIN"]
serviceAccount:
name: # override exporter service account name
name: # override exporter service account name
tolerations: [] # Override the default tolerations
additionalNodeAffinityTerms: [] # Additional required node affinity selector terms

0 comments on commit e907bef

Please sign in to comment.