Skip to content

Commit

Permalink
sync machine labels
Browse files Browse the repository at this point in the history
  • Loading branch information
ykakarap committed Jan 23, 2025
1 parent c0cf7c6 commit 3aeb947
Show file tree
Hide file tree
Showing 8 changed files with 118 additions and 22 deletions.
4 changes: 4 additions & 0 deletions controllers/alias.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package controllers

import (
"context"
"regexp"
"time"

ctrl "sigs.k8s.io/controller-runtime"
Expand Down Expand Up @@ -72,6 +73,8 @@ type MachineReconciler struct {
WatchFilterValue string

RemoteConditionsGracePeriod time.Duration

SyncMachineLabels []*regexp.Regexp
}

func (r *MachineReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error {
Expand All @@ -81,6 +84,7 @@ func (r *MachineReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manag
ClusterCache: r.ClusterCache,
WatchFilterValue: r.WatchFilterValue,
RemoteConditionsGracePeriod: r.RemoteConditionsGracePeriod,
SyncMachineLabels: r.SyncMachineLabels,
}).SetupWithManager(ctx, mgr, options)
}

Expand Down
12 changes: 8 additions & 4 deletions docs/book/src/reference/api/metadata-propagation.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,11 @@ Top-level labels that meet a specific cretria are propagated to the Node labels
- `.labels.[label-meets-criteria]` => `Node.labels`
- `.annotations` => Not propagated.

Label should meet one of the following criteria to propagate to Node:
- Has `node-role.kubernetes.io` as prefix.
- Belongs to `node-restriction.kubernetes.io` domain.
- Belongs to `node.cluster.x-k8s.io` domain.
The following default set of labels on the Machines are always synced to the Nodes:
- `node-role.kubernetes.io`
- `node-restriction.kubernetes.io`
- `*.node-restriction.kubernetes.io`
- `node.cluster.x-k8s.io`
- `*.node.cluster.x-k8s.io`

In addition, any labels that match at least one of the regexes provided by the `--additional-sync-machine-labels` flag on the manager will be synced from the Machine to the Node.
14 changes: 9 additions & 5 deletions docs/proposals/20220927-label-sync-between-machine-and-nodes.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,9 @@ With the "divide and conquer" principle in mind this proposal aims to address th

### Goals

- Support label sync from Machine to the linked Kubernetes node, limited to `node-role.kubernetes.io/` prefix and the `node-restriction.kubernetes.io` domain.
- Support label sync from Machine to the linked Kubernetes node.
- Support syncing labels from Machine to the linked Kubernetes node for the Cluster API owned `node.cluster.x-k8s.io` domain.
- Support to configure the domains of labels to be synced from the Machine to the Node.

### Non-Goals

Expand Down Expand Up @@ -98,14 +99,16 @@ While designing a solution for syncing labels between Machine and underlying Kub

### Label domains & prefixes

The idea of scoping synchronization to a well defined set of labels is a first answer to security/concurrency concerns; labels to be managed by Cluster API have been selected based on following criteria:
A default list of labels would always be synced from the Machines to the Nodes. An additional list of labels can be synced from the Machine to the Node by providing a list of regexes as a flag to the manager.

- The `node-role.kubernetes.io` label has been used widely in the past to identify the role of a Kubernetes Node (e.g. `node-role.kubernetes.io/worker=''`). For example, `kubectl get node` looks for this specific label when displaying the role to the user.
The following is the default list of label domains that would always be sync from Machines to Nodes:

- The `node-restriction.kubernetes.io/` domain is recommended in the [Kubernetes docs](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#noderestriction) for things such as workload placement; please note that in this case
- `node-role.kubernetes.io`: The `node-role.kubernetes.io` label has been used widely in the past to identify the role of a Kubernetes Node (e.g. `node-role.kubernetes.io/worker=''`). For example, `kubectl get node` looks for this specific label when displaying the role to the user.

- `node-restriction.kubernetes.io`, `*.node-restriction.kubernetes.io`: The `node-restriction.kubernetes.io/` domain is recommended in the [Kubernetes docs](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#noderestriction) for things such as workload placement; please note that in this case
we are considering the entire domain, thus also labels like `example.node-restriction.kubernetes.io/fips=true` fall in this category.

- Cluster API owns a specific domain: `node.cluster.x-k8s.io`.
- `node.cluster.x-k8s.io`, `*node.cluster.x-k8s.io`: Cluster API owns a specific domain: `node.cluster.x-k8s.io`.

#### Synchronization of CAPI Labels

Expand Down Expand Up @@ -163,3 +166,4 @@ Users could also implement their own label synchronizer in their tooling, but th

- [ ] 09/27/2022: First Draft of this document
- [ ] 09/28/2022: First Draft of this document presented in the Cluster API office hours meeting
- [ ] 01/09/2025: Update to support configurable label syncing Ref:[11657](https://github.com/kubernetes-sigs/cluster-api/issues/11657)
3 changes: 3 additions & 0 deletions internal/controllers/machine/machine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package machine
import (
"context"
"fmt"
"regexp"
"slices"
"strings"
"time"
Expand Down Expand Up @@ -99,6 +100,8 @@ type Reconciler struct {

RemoteConditionsGracePeriod time.Duration

SyncMachineLabels []*regexp.Regexp

controller controller.Controller
recorder record.EventRecorder
externalTracker external.ObjectTracker
Expand Down
14 changes: 11 additions & 3 deletions internal/controllers/machine/machine_controller_noderef.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ func (r *Reconciler) reconcileNode(ctx context.Context, s *scope) (ctrl.Result,
// Compute labels to be propagated from Machines to nodes.
// NOTE: CAPI should manage only a subset of node labels, everything else should be preserved.
// NOTE: Once we reconcile node labels for the first time, the NodeUninitializedTaint is removed from the node.
nodeLabels := getManagedLabels(machine.Labels)
nodeLabels := r.getManagedLabels(machine.Labels)

// Get interruptible instance status from the infrastructure provider and set the interruptible label on the node.
interruptible := false
Expand Down Expand Up @@ -178,9 +178,10 @@ func (r *Reconciler) reconcileNode(ctx context.Context, s *scope) (ctrl.Result,

// getManagedLabels gets a map[string]string and returns another map[string]string
// filtering out labels not managed by CAPI.
func getManagedLabels(labels map[string]string) map[string]string {
func (r *Reconciler) getManagedLabels(labels map[string]string) map[string]string {
managedLabels := make(map[string]string)
for key, value := range labels {
// Always sync the default set of labels.
dnsSubdomainOrName := strings.Split(key, "/")[0]
if dnsSubdomainOrName == clusterv1.NodeRoleLabelPrefix {
managedLabels[key] = value
Expand All @@ -191,8 +192,15 @@ func getManagedLabels(labels map[string]string) map[string]string {
if dnsSubdomainOrName == clusterv1.ManagedNodeLabelDomain || strings.HasSuffix(dnsSubdomainOrName, "."+clusterv1.ManagedNodeLabelDomain) {
managedLabels[key] = value
}
}

// Sync if the labels matches at least one user provided regex.
for _, regex := range r.SyncMachineLabels {
if regex.MatchString(key) {
managedLabels[key] = value
break
}
}
}
return managedLabels
}

Expand Down
70 changes: 60 additions & 10 deletions internal/controllers/machine/machine_controller_noderef_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package machine
import (
"context"
"fmt"
"regexp"
"testing"
"time"

Expand Down Expand Up @@ -700,8 +701,7 @@ func TestSummarizeNodeConditions(t *testing.T) {
}

func TestGetManagedLabels(t *testing.T) {
// Create managedLabels map from known managed prefixes.
managedLabels := map[string]string{
defaultLabels := map[string]string{
clusterv1.NodeRoleLabelPrefix + "/anyRole": "",

clusterv1.ManagedNodeLabelDomain: "",
Expand All @@ -715,22 +715,72 @@ func TestGetManagedLabels(t *testing.T) {
"custom-prefix." + clusterv1.NodeRestrictionLabelDomain + "/anything": "",
}

// Append arbitrary labels.
allLabels := map[string]string{
"foo": "",
"bar": "",
additionalLabels := map[string]string{
"foo": "bar",
"bar": "baz",
"company.xyz/node.cluster.x-k8s.io": "not-managed",
"gpu-node.cluster.x-k8s.io": "not-managed",
"company.xyz/node-restriction.kubernetes.io": "not-managed",
"gpu-node-restriction.kubernetes.io": "not-managed",
"wrong.test.foo.com": "",
}
for k, v := range managedLabels {

exampleRegex := regexp.MustCompile(`foo`)
defaultAndRegexLabels := map[string]string{}
for k, v := range defaultLabels {
defaultAndRegexLabels[k] = v
}
defaultAndRegexLabels["foo"] = "bar"
defaultAndRegexLabels["wrong.test.foo.com"] = ""

allLabels := map[string]string{}
for k, v := range defaultLabels {
allLabels[k] = v
}
for k, v := range additionalLabels {
allLabels[k] = v
}

g := NewWithT(t)
got := getManagedLabels(allLabels)
g.Expect(got).To(BeEquivalentTo(managedLabels))
tests := []struct {
name string
syncMachineLabels []*regexp.Regexp
allLabels map[string]string
managedLabels map[string]string
}{
{
name: "always sync default labels",
syncMachineLabels: nil,
allLabels: allLabels,
managedLabels: defaultLabels,
},
{
name: "sync additional defined labels",
syncMachineLabels: []*regexp.Regexp{
exampleRegex,
},
allLabels: allLabels,
managedLabels: defaultAndRegexLabels,
},
{
name: "sync all labels",
syncMachineLabels: []*regexp.Regexp{
regexp.MustCompile(`.*`),
},
allLabels: allLabels,
managedLabels: allLabels,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g := NewWithT(t)
r := &Reconciler{
SyncMachineLabels: tt.syncMachineLabels,
}
got := r.getManagedLabels(tt.allLabels)
g.Expect(got).To(BeEquivalentTo(tt.managedLabels))
})
}
}

func TestPatchNode(t *testing.T) {
Expand Down
1 change: 1 addition & 0 deletions internal/controllers/machine/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ func TestMain(m *testing.M) {
APIReader: mgr.GetAPIReader(),
ClusterCache: clusterCache,
RemoteConditionsGracePeriod: 5 * time.Minute,
SyncMachineLabels: nil,
}).SetupWithManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: 1}); err != nil {
panic(fmt.Sprintf("Failed to start MachineReconciler: %v", err))
}
Expand Down
22 changes: 22 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"flag"
"fmt"
"os"
"regexp"
goruntime "runtime"
"time"

Expand All @@ -35,6 +36,7 @@ import (
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/selection"
kerrors "k8s.io/apimachinery/pkg/util/errors"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/tools/leaderelection/resourcelock"
cliflag "k8s.io/component-base/cli/flag"
Expand Down Expand Up @@ -125,6 +127,7 @@ var (
machinePoolConcurrency int
clusterResourceSetConcurrency int
machineHealthCheckConcurrency int
syncMachineLabels []string

Check failure on line 130 in main.go

View workflow job for this annotation

GitHub Actions / lint

File is not properly formatted (gci)
)

func init() {
Expand Down Expand Up @@ -251,6 +254,9 @@ func InitFlags(fs *pflag.FlagSet) {
fs.StringVar(&healthAddr, "health-addr", ":9440",
"The address the health endpoint binds to.")

fs.StringArrayVar(&syncMachineLabels, "additional-sync-machine-labels", []string{},
"List of regexes to select the additional set of labels to sync from the Machine to the Node. A label will be synced as long as it matches at least one of the regexes.")

flags.AddManagerOptions(fs, &managerOptions)

feature.MutableGates.AddFlag(fs)
Expand Down Expand Up @@ -559,12 +565,28 @@ func setupReconcilers(ctx context.Context, mgr ctrl.Manager, watchNamespaces map
setupLog.Error(err, "Unable to create controller", "controller", "Cluster")
os.Exit(1)
}

var errs []error
var syncMachineLabelRegexes []*regexp.Regexp
for _, re := range syncMachineLabels {
reg, err := regexp.Compile(re)
if err != nil {
errs = append(errs, err)
} else {
syncMachineLabelRegexes = append(syncMachineLabelRegexes, reg)
}
}
if len(errs) > 0 {
setupLog.Error(fmt.Errorf("at least one of --additional-sync-machine-labels regexes is invalid: %w", kerrors.NewAggregate(errs)), "Unable to start manager")
os.Exit(1)
}
if err := (&controllers.MachineReconciler{
Client: mgr.GetClient(),
APIReader: mgr.GetAPIReader(),
ClusterCache: clusterCache,
WatchFilterValue: watchFilterValue,
RemoteConditionsGracePeriod: remoteConditionsGracePeriod,
SyncMachineLabels: syncMachineLabelRegexes,
}).SetupWithManager(ctx, mgr, concurrency(machineConcurrency)); err != nil {
setupLog.Error(err, "Unable to create controller", "controller", "Machine")
os.Exit(1)
Expand Down

0 comments on commit 3aeb947

Please sign in to comment.