Skip to content

Commit

Permalink
Nova services FFU during adoption (no extra cell)
Browse files Browse the repository at this point in the history
Update EDPM adoption docs and tests to execute Nova compute post-FFU.

Signed-off-by: Bohdan Dobrelia <[email protected]>
  • Loading branch information
bogdando committed Nov 6, 2023
1 parent aae3a32 commit 0020ebb
Show file tree
Hide file tree
Showing 3 changed files with 374 additions and 1 deletion.
182 changes: 181 additions & 1 deletion docs/openstack/edpm_adoption.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,12 @@

## Variables

(There are no shell variables necessary currently.)
Define the shell variables used in the Fast-forward upgrade steps below.
The values are just illustrative, use values that are correct for your environment:

```bash
PODIFIED_DB_ROOT_PASSWORD=$(oc get -o json secret/osp-secret | jq -r .data.DbRootPassword | base64 -d)
```

## Pre-checks

Expand Down Expand Up @@ -263,3 +268,178 @@ EOF
```
oc wait --for condition=Ready osdpns/openstack --timeout=30m
```
## Nova compute services fast-forward upgrade from Wallaby to Antelope
Nova services rolling upgrade cannot be done during adoption,
there is in a lock-step with Nova control plane services, because those
are managed independently by EDPM ansible, and Kubernetes operators.
Nova service operator and OpenStack Dataplane operator ensure upgrading
is done independently of each other, by configuring
`[upgrade_levels]compute=auto` for Nova services. Nova control plane
services apply the change right after CR is patched. Nova compute EDPM
services will catch up the same config change with ansible deployment
later on.
> **NOTE**: Additional orchestration happening around the FFU workarounds
> configuration for Nova compute EDPM service is a subject of future changes.
* Configure pre-FFU workarounds for Nova compute EDPM services to update its version records:
```yaml
oc apply -f - <<EOF
apiVersion: v1
kind: ConfigMap
metadata:
name: nova-compute-workarounds
namespace: openstack
data:
19-nova-compute-cell1-workarounds.conf: |
[workarounds]
disable_compute_service_check_for_ffu=true
EOF
oc apply -f - <<EOF
apiVersion: dataplane.openstack.org/v1beta1
kind: OpenStackDataPlaneService
metadata:
name: nova-compute-workarounds
namespace: openstack
spec:
label: nova.compute.workarounds
configMaps:
- nova-compute-workarounds
playbook: osp.edpm.nova
---
apiVersion: dataplane.openstack.org/v1beta1
kind: OpenStackDataPlaneDeployment
metadata:
name: openstack-nova-compute-workarounds
namespace: openstack
spec:
nodeSets:
- openstack
servicesOverride:
- nova-compute-workarounds
EOF
```
* Wait for cell1 Nova compute EDPM services version updated (it may take some time):
```bash
oc exec -it mariadb-openstack-cell1 -- mysql --user=root --password=${PODIFIED_DB_ROOT_PASSWORD} \
-e "select a.version from nova_cell1.services a join nova_cell1.services b where a.version!=b.version and a.binary='nova-compute';"
```
The above query should return an empty result as a completion criterion.
* Remove pre-FFU workarounds for Nova control plane services:
```yaml
oc patch openstackcontrolplane openstack -n openstack --type=merge --patch '
spec:
nova:
template:
cellTemplates:
cell0:
conductorServiceTemplate:
customServiceConfig: |
[workarounds]
disable_compute_service_check_for_ffu=false
cell1:
metadataServiceTemplate:
customServiceConfig: |
[workarounds]
disable_compute_service_check_for_ffu=false
conductorServiceTemplate:
customServiceConfig: |
[workarounds]
disable_compute_service_check_for_ffu=false
apiServiceTemplate:
customServiceConfig: |
[workarounds]
disable_compute_service_check_for_ffu=false
metadataServiceTemplate:
customServiceConfig: |
[workarounds]
disable_compute_service_check_for_ffu=false
schedulerServiceTemplate:
customServiceConfig: |
[workarounds]
disable_compute_service_check_for_ffu=false
'
```
* Wait for Nova control plane services' CRs to become ready:
```bash
oc get novaapis --field-selector metadata.name=nova-api -o jsonpath='{.items[0].status.conditions}' \
| jq -e '.[]|select(.type=="Ready" and .status=="True")'
oc get novacells --field-selector metadata.name=nova-cell0 -o jsonpath='{.items[0].status.conditions}' \
| jq -e '.[]|select(.type=="Ready" and .status=="True")'
oc get novacells --field-selector metadata.name=nova-cell1 -o jsonpath='{.items[0].status.conditions}' \
| jq -e '.[]|select(.type=="Ready" and .status=="True")'
oc get novaconductors --field-selector metadata.name=nova-cell0-conductor -o jsonpath='{.items[0].status.conditions}' \
| jq -e '.[]|select(.type=="Ready" and .status=="True")'
oc get novaconductors --field-selector metadata.name=nova-cell1-conductor -o jsonpath='{.items[0].status.conditions}' \
| jq -e '.[]|select(.type=="Ready" and .status=="True")'
oc get novametadata --field-selector metadata.name=nova-metadata -o jsonpath='{.items[0].status.conditions}' \
| jq -e '.[]|select(.type=="Ready" and .status=="True")'
oc get novanovncproxies --field-selector metadata.name=nova-cell1-novncproxy -o jsonpath='{.items[0].status.conditions}' \
| jq -e '.[]|select(.type=="Ready" and .status=="True")'
oc get novaschedulers --field-selector metadata.name=nova-scheduler -o jsonpath='{.items[0].status.conditions}' \
| jq -e '.[]|select(.type=="Ready" and .status=="True")'
```
* Remove pre-FFU workarounds for Nova compute EDPM services:
```yaml
oc apply -f - <<EOF
apiVersion: v1
kind: ConfigMap
metadata:
name: nova-compute-ffu
namespace: openstack
data:
20-nova-compute-cell1-ffu-cleanup.conf: |
[workarounds]
disable_compute_service_check_for_ffu=false
EOF
oc apply -f - <<EOF
apiVersion: dataplane.openstack.org/v1beta1
kind: OpenStackDataPlaneService
metadata:
name: nova-compute-ffu
namespace: openstack
spec:
label: nova.compute.ffu
configMaps:
- nova-compute-ffu
playbook: osp.edpm.nova
---
apiVersion: dataplane.openstack.org/v1beta1
kind: OpenStackDataPlaneDeployment
metadata:
name: openstack-nova-compute-ffu
namespace: openstack
spec:
nodeSets:
- openstack
servicesOverride:
- nova-compute-ffu
EOF
```
* Wait for Nova compute EDPM service to become ready:
```bash
oc wait --for condition=Ready osdpd/openstack-nova-compute-ffu --timeout=5m
```
* Run Nova DB online migrations to complete FFU:
```bash
oc exec -it nova-cell0-conductor-0 -- nova-manage db online_data_migrations
oc exec -it nova-cell1-conductor-0 -- nova-manage db online_data_migrations
```
4 changes: 4 additions & 0 deletions tests/roles/dataplane_adoption/tasks/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -265,3 +265,7 @@
oc wait --for condition=Ready osdpns/openstack --timeout=40m
# TODO: work on network configuration for making possible to run this task on other IP ranges
when: "edpm_node_ip.startswith('192.168.122')"

- name: Complete Nova services Wallaby->Antelope FFU
ansible.builtin.include_tasks:
file: nova_ffu.yaml
189 changes: 189 additions & 0 deletions tests/roles/dataplane_adoption/tasks/nova_ffu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
- name: set podified MariaDB copy shell vars
no_log: "{{ use_no_log }}"
ansible.builtin.set_fact:
mariadb_copy_shell_vars: |
PODIFIED_DB_ROOT_PASSWORD="{{ podified_db_root_password }}"
- name: configure pre-FFU workarounds for Nova compute EDPM services to update its version records
ansible.builtin.shell: |
{{ shell_header }}
{{ oc_header }}
oc apply -f - <<EOF
apiVersion: v1
kind: ConfigMap
metadata:
name: nova-compute-workarounds
namespace: openstack
data:
19-nova-compute-cell1-workarounds.conf: |
[workarounds]
disable_compute_service_check_for_ffu=true
EOF
oc apply -f - <<EOF
apiVersion: dataplane.openstack.org/v1beta1
kind: OpenStackDataPlaneService
metadata:
name: nova-compute-workarounds
namespace: openstack
spec:
label: nova.compute.workarounds
configMaps:
- nova-compute-workarounds
playbook: osp.edpm.nova
---
apiVersion: dataplane.openstack.org/v1beta1
kind: OpenStackDataPlaneDeployment
metadata:
name: openstack-nova-compute-workarounds
namespace: openstack
spec:
nodeSets:
- openstack
servicesOverride:
- nova-compute-workarounds
EOF
- name: wait for cell1 Nova compute EDPM services version updated
ansible.builtin.shell: |
{{ shell_header }}
{{ oc_header }}
{{ mariadb_copy_shell_vars }}
oc exec -it mariadb-openstack-cell1 -- mysql --user=root --password=${PODIFIED_DB_ROOT_PASSWORD} \
-e "select a.version from nova_cell1.services a join nova_cell1.services b where a.version!=b.version and a.binary='nova-compute';"
register: records_check_results
until: records_check_results.rc == 0 and records_check_results.stdout_lines | length == 0
retries: 20
delay: 6

- name: remove pre-FFU workarounds for Nova control plane services
ansible.builtin.shell: |
{{ shell_header }}
{{ oc_header }}
oc patch openstackcontrolplane openstack -n openstack --type=merge --patch '
spec:
nova:
template:
cellTemplates:
cell0:
conductorServiceTemplate:
customServiceConfig: |
[workarounds]
disable_compute_service_check_for_ffu=false
cell1:
metadataServiceTemplate:
customServiceConfig: |
[workarounds]
disable_compute_service_check_for_ffu=false
conductorServiceTemplate:
customServiceConfig: |
[workarounds]
disable_compute_service_check_for_ffu=false
apiServiceTemplate:
customServiceConfig: |
[workarounds]
disable_compute_service_check_for_ffu=false
metadataServiceTemplate:
customServiceConfig: |
[workarounds]
disable_compute_service_check_for_ffu=false
schedulerServiceTemplate:
customServiceConfig: |
[workarounds]
disable_compute_service_check_for_ffu=false
'
# NOTE(bogdando): Status phase 'Running' doesn't necessarily mean it IS running in fact.
# Instead, check for CRs status, then attempt exec'ing on the conductors pods to run live migrations,
# with retries as guardrails for real running statuses of pods
- name: wait for Nova control plane services' CRs to become ready
ansible.builtin.shell: |
{{ shell_header }}
{{ oc_header }}
oc get {{ service.cr }} --field-selector metadata.name={{ service.name }} -o jsonpath='{.items[0].status.conditions}' \
| jq -e '.[]|select(.type=="Ready" and .status=="True")'
register: nova_crs_ready_result
until: nova_crs_ready_result is success
retries: 30
delay: 5
loop_control:
loop_var: service
loop:
- cr: novaapis
name: nova-api
- cr: novacells
name: nova-cell0
- cr: novacells
name: nova-cell1
- cr: novaconductors
name: nova-cell0-conductor
cond: DeploymentReady
- cr: novaconductors
name: nova-cell1-conductor
- cr: novametadata
name: nova-metadata
- cr: novanovncproxies
name: nova-cell1-novncproxy
- cr: novaschedulers
name: nova-scheduler

- name: remove pre-FFU workarounds for Nova compute EDPM services
ansible.builtin.shell: |
{{ shell_header }}
{{ oc_header }}
oc apply -f - <<EOF
apiVersion: v1
kind: ConfigMap
metadata:
name: nova-compute-ffu
namespace: openstack
data:
20-nova-compute-cell1-ffu-cleanup.conf: |
[workarounds]
disable_compute_service_check_for_ffu=false
EOF
oc apply -f - <<EOF
apiVersion: dataplane.openstack.org/v1beta1
kind: OpenStackDataPlaneService
metadata:
name: nova-compute-ffu
namespace: openstack
spec:
label: nova.compute.ffu
configMaps:
- nova-compute-ffu
playbook: osp.edpm.nova
---
apiVersion: dataplane.openstack.org/v1beta1
kind: OpenStackDataPlaneDeployment
metadata:
name: openstack-nova-compute-ffu
namespace: openstack
spec:
nodeSets:
- openstack
servicesOverride:
- nova-compute-ffu
EOF
- name: wait for Nova compute EDPM services to become ready
ansible.builtin.shell: |
{{ shell_header }}
{{ oc_header }}
oc wait --for condition=Ready osdpd/openstack-nova-compute-ffu --timeout=5m
register: nova_ffu_edpm_result
until: nova_ffu_edpm_result is success
retries: 10
delay: 6

- name: run Nova DB migrations to complete Wallaby->antelope FFU
ansible.builtin.shell: |
{{ shell_header }}
{{ oc_header }}
oc exec -it nova-cell0-conductor-0 -- nova-manage db online_data_migrations
oc exec -it nova-cell1-conductor-0 -- nova-manage db online_data_migrations
register: nova_exec_result
until: nova_exec_result is success
retries: 10
delay: 6

0 comments on commit 0020ebb

Please sign in to comment.