Skip to content

Commit

Permalink
Stricter kubeadm validation (config and runtime checks) (#11710)
Browse files Browse the repository at this point in the history
* kubeadm: do not ignore preflight errors blindly

The "ignoring all errors" seems to date back to the inception of the
kubeadm support (it was --skip-preflight-check before).

This can mask real errors and prevent users from seeing them.

Do not ignore any errors by default and make the set of ignored errors
configurable.

* download/kubeadm: remove redundant task

The mode is already set by the previous `copy` task.

* Validate kubeadm configs

This should help to fail early when we have invalid kubeadm configs (from
a kubespray bug or a misconfiguration).

* kubeadm-upgrade: remove unnecessary bool cast

* Convert kubeadm join discovery timeout to v1beta4 config

* CI: Ignore kubeadm:Mem errors on some setup.
  • Loading branch information
VannTen authored Nov 15, 2024
1 parent 05e2b47 commit 68718dc
Show file tree
Hide file tree
Showing 11 changed files with 49 additions and 53 deletions.
21 changes: 8 additions & 13 deletions roles/download/tasks/prep_kubeadm_images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,21 @@
- not skip_downloads | default(false)
- downloads.kubeadm.enabled

- name: Prep_kubeadm_images | Create kubeadm config
template:
src: "kubeadm-images.yaml.j2"
dest: "{{ kube_config_dir }}/kubeadm-images.yaml"
mode: "0644"
when:
- not skip_kubeadm_images | default(false)

- name: Prep_kubeadm_images | Copy kubeadm binary from download dir to system path
copy:
src: "{{ downloads.kubeadm.dest }}"
dest: "{{ bin_dir }}/kubeadm"
mode: "0755"
remote_src: true

- name: Prep_kubeadm_images | Set kubeadm binary permissions
file:
path: "{{ bin_dir }}/kubeadm"
mode: "0755"
state: file
- name: Prep_kubeadm_images | Create kubeadm config
template:
src: "kubeadm-images.yaml.j2"
dest: "{{ kube_config_dir }}/kubeadm-images.yaml"
mode: "0644"
validate: "{{ bin_dir }}/kubeadm config validate --config %s"
when:
- not skip_kubeadm_images | default(false)

- name: Prep_kubeadm_images | Generate list of required images
shell: "set -o pipefail && {{ bin_dir }}/kubeadm config images list --config={{ kube_config_dir }}/kubeadm-images.yaml | grep -Ev 'coredns|pause'"
Expand Down
3 changes: 2 additions & 1 deletion roles/kubernetes/control-plane/tasks/kubeadm-secondary.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
dest: "{{ kube_config_dir }}/kubeadm-controlplane.yaml"
mode: "0640"
backup: true
validate: "{{ bin_dir }}/kubeadm config validate --config %s"
when:
- inventory_hostname != first_kube_control_plane
- not kubeadm_already_run.stat.exists
Expand Down Expand Up @@ -87,7 +88,7 @@
command: >-
{{ bin_dir }}/kubeadm join
--config {{ kube_config_dir }}/kubeadm-controlplane.yaml
--ignore-preflight-errors=all
--ignore-preflight-errors={{ kubeadm_ignore_preflight_errors | join(',') }}
--skip-phases={{ kubeadm_join_phases_skip | join(',') }}
environment:
PATH: "{{ bin_dir }}:{{ ansible_env.PATH }}"
Expand Down
3 changes: 2 additions & 1 deletion roles/kubernetes/control-plane/tasks/kubeadm-setup.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@
src: "kubeadm-config.{{ kubeadm_config_api_version }}.yaml.j2"
dest: "{{ kube_config_dir }}/kubeadm-config.yaml"
mode: "0640"
validate: "{{ bin_dir }}/kubeadm config validate --config %s"

- name: Kubeadm | Create directory to store admission control configurations
file:
Expand Down Expand Up @@ -168,7 +169,7 @@
timeout -k {{ kubeadm_init_timeout }} {{ kubeadm_init_timeout }}
{{ bin_dir }}/kubeadm init
--config={{ kube_config_dir }}/kubeadm-config.yaml
--ignore-preflight-errors=all
--ignore-preflight-errors={{ kubeadm_ignore_preflight_errors | join(',') }}
--skip-phases={{ kubeadm_init_phases_skip | join(',') }}
{{ kube_external_ca_mode | ternary('', '--upload-certs') }}
register: kubeadm_init
Expand Down
8 changes: 4 additions & 4 deletions roles/kubernetes/control-plane/tasks/kubeadm-upgrade.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
{{ bin_dir }}/kubeadm
upgrade apply -y {{ kube_version }}
--certificate-renewal={{ kubeadm_upgrade_auto_cert_renewal }}
--ignore-preflight-errors=all
--ignore-preflight-errors={{ kubeadm_ignore_preflight_errors | join(',') }}
--allow-experimental-upgrades
--etcd-upgrade={{ (etcd_deployment_type == "kubeadm") | bool | lower }}
--etcd-upgrade={{ (etcd_deployment_type == "kubeadm") | lower }}
{% if kubeadm_patches | length > 0 %}--patches={{ kubeadm_patches_dir }}{% endif %}
--force
register: kubeadm_upgrade
Expand All @@ -36,9 +36,9 @@
{{ bin_dir }}/kubeadm
upgrade apply -y {{ kube_version }}
--certificate-renewal={{ kubeadm_upgrade_auto_cert_renewal }}
--ignore-preflight-errors=all
--ignore-preflight-errors={{ kubeadm_ignore_preflight_errors | join(',') }}
--allow-experimental-upgrades
--etcd-upgrade={{ (etcd_deployment_type == "kubeadm") | bool | lower }}
--etcd-upgrade={{ (etcd_deployment_type == "kubeadm") | lower }}
{% if kubeadm_patches | length > 0 %}--patches={{ kubeadm_patches_dir }}{% endif %}
--force
register: kubeadm_upgrade
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,14 @@ discovery:
token: {{ kubeadm_token }}
unsafeSkipCAVerification: true
{% endif %}
timeout: {{ discovery_timeout }}
tlsBootstrapToken: {{ kubeadm_token }}
{# TODO: drop the if when we drop support for k8s<1.31 #}
{% if kubeadm_config_api_version == 'v1beta3' %}
timeout: {{ discovery_timeout }}
{% else %}
timeouts:
discovery: {{ discovery_timeout }}
{% endif %}
controlPlane:
localAPIEndpoint:
advertiseAddress: {{ kube_apiserver_address }}
Expand Down
1 change: 1 addition & 0 deletions roles/kubernetes/kubeadm/tasks/kubeadm_etcd_node.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
src: "kubeadm-client.conf.j2"
dest: "{{ kube_config_dir }}/kubeadm-cert-controlplane.conf"
mode: "0640"
validate: "{{ bin_dir }}/kubeadm config validate --config %s"
vars:
kubeadm_cert_controlplane: true

Expand Down
43 changes: 11 additions & 32 deletions roles/kubernetes/kubeadm/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
dest: "{{ kube_config_dir }}/kubeadm-client.conf"
backup: true
mode: "0640"
validate: "{{ bin_dir }}/kubeadm config validate --config %s"
when: ('kube_control_plane' not in group_names)

- name: Join to cluster if needed
Expand All @@ -85,38 +86,16 @@
when:
- ('kube_control_plane' not in group_names)
- not kubelet_conf.stat.exists
block:

- name: Join to cluster
command: >-
timeout -k {{ kubeadm_join_timeout }} {{ kubeadm_join_timeout }}
{{ bin_dir }}/kubeadm join
--config {{ kube_config_dir }}/kubeadm-client.conf
--ignore-preflight-errors=DirAvailable--etc-kubernetes-manifests
--skip-phases={{ kubeadm_join_phases_skip | join(',') }}
register: kubeadm_join
changed_when: kubeadm_join is success

rescue:

- name: Join to cluster with ignores
command: >-
timeout -k {{ kubeadm_join_timeout }} {{ kubeadm_join_timeout }}
{{ bin_dir }}/kubeadm join
--config {{ kube_config_dir }}/kubeadm-client.conf
--ignore-preflight-errors=all
--skip-phases={{ kubeadm_join_phases_skip | join(',') }}
register: kubeadm_join
changed_when: kubeadm_join is success

always:

- name: Display kubeadm join stderr if any
when: kubeadm_join is failed
debug:
msg: |
Joined with warnings
{{ kubeadm_join.stderr_lines }}
vars:
ignored:
- DirAvailable--etc-kubernetes-manifests
- "{{ kubeadm_ignore_preflight_errors }}"
command: >-
timeout -k {{ kubeadm_join_timeout }} {{ kubeadm_join_timeout }}
{{ bin_dir }}/kubeadm join
--config {{ kube_config_dir }}/kubeadm-client.conf
--ignore-preflight-errors={{ ignored | flatten | join(',') }}
--skip-phases={{ kubeadm_join_phases_skip | join(',') }}
- name: Update server field in kubelet kubeconfig
lineinfile:
Expand Down
8 changes: 7 additions & 1 deletion roles/kubernetes/kubeadm/templates/kubeadm-client.conf.j2
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,14 @@ discovery:
unsafeSkipCAVerification: true
{% endif %}
{% endif %}
timeout: {{ discovery_timeout }}
tlsBootstrapToken: {{ kubeadm_token }}
{# TODO: drop the if when we drop support for k8s<1.31 #}
{% if kubeadm_config_api_version == 'v1beta3' %}
timeout: {{ discovery_timeout }}
{% else %}
timeouts:
discovery: {{ discovery_timeout }}
{% endif %}
caCertPath: {{ kube_cert_dir }}/ca.crt
{% if kubeadm_cert_controlplane is defined and kubeadm_cert_controlplane %}
controlPlane:
Expand Down
3 changes: 3 additions & 0 deletions roles/kubernetes/kubeadm_common/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,6 @@ kubeadm_patches: []
# example.com/prod_level: "{{ prod_level }}"
# - ...
# Patches are applied in the order they are specified.

# List of errors to ignore during kubeadm preflight checks
kubeadm_ignore_preflight_errors: []
2 changes: 2 additions & 0 deletions tests/files/packet_ubuntu22-calico-all-in-one.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ vm_memory: 1600

# Kubespray settings
auto_renew_certificates: true
kubeadm_ignore_preflight_errors:
- Mem

# Currently ipvs not available on KVM: https://packages.ubuntu.com/search?suite=focal&arch=amd64&mode=exactfilename&searchon=contents&keywords=ip_vs_sh.ko
kube_proxy_mode: iptables
Expand Down
2 changes: 2 additions & 0 deletions tests/files/packet_ubuntu24-calico-etcd-datastore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ vm_memory: 1600

# Kubespray settings
auto_renew_certificates: true
kubeadm_ignore_preflight_errors:
- Mem

# Currently ipvs not available on KVM: https://packages.ubuntu.com/search?suite=noble&arch=amd64&mode=exactfilename&searchon=contents&keywords=ip_vs_sh.ko
kube_proxy_mode: iptables
Expand Down

0 comments on commit 68718dc

Please sign in to comment.