Coverage for watcher/decision_engine/strategy/strategies/host_maintenance.py: 88%
102 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-17 12:22 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-17 12:22 +0000
1# -*- encoding: utf-8 -*-
2# Copyright (c) 2017 chinac.com
3#
4# Authors: suzhengwei<suzhengwei@chinac.com>
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
15# implied.
16# See the License for the specific language governing permissions and
17# limitations under the License.
18#
20from oslo_log import log
21from watcher._i18n import _
22from watcher.common import exception
23from watcher.decision_engine.model import element
24from watcher.decision_engine.strategy.strategies import base
26LOG = log.getLogger(__name__)
29class HostMaintenance(base.HostMaintenanceBaseStrategy):
30 """[PoC]Host Maintenance
32 *Description*
34 It is a migration strategy for one compute node maintenance,
35 without having the user's application been interrupted.
36 If given one backup node, the strategy will firstly
37 migrate all instances from the maintenance node to
38 the backup node. If the backup node is not provided,
39 it will migrate all instances, relying on nova-scheduler.
41 *Requirements*
43 * You must have at least 2 physical compute nodes to run this strategy.
45 *Limitations*
47 - This is a proof of concept that is not meant to be used in production
48 - It migrates all instances from one host to other hosts. It's better to
49 execute such strategy when load is not heavy, and use this algorithm
50 with `ONESHOT` audit.
51 - It assumes that cold and live migrations are possible.
52 """
54 INSTANCE_MIGRATION = "migrate"
55 CHANGE_NOVA_SERVICE_STATE = "change_nova_service_state"
57 def __init__(self, config, osc=None):
58 super(HostMaintenance, self).__init__(config, osc)
60 @classmethod
61 def get_name(cls):
62 return "host_maintenance"
64 @classmethod
65 def get_display_name(cls):
66 return _("Host Maintenance Strategy")
68 @classmethod
69 def get_translatable_display_name(cls):
70 return "Host Maintenance Strategy"
72 @classmethod
73 def get_schema(cls):
74 return {
75 "properties": {
76 "maintenance_node": {
77 "description": "The name of the compute node which "
78 "need maintenance",
79 "type": "string",
80 },
81 "backup_node": {
82 "description": "The name of the compute node which "
83 "will backup the maintenance node.",
84 "type": "string",
85 },
86 },
87 "required": ["maintenance_node"],
88 }
90 def get_instance_state_str(self, instance):
91 """Get instance state in string format"""
92 if isinstance(instance.state, str):
93 return instance.state
94 elif isinstance(instance.state, element.InstanceState):
95 return instance.state.value
96 else:
97 LOG.error('Unexpected instance state type, '
98 'state=%(state)s, state_type=%(st)s.',
99 dict(state=instance.state,
100 st=type(instance.state)))
101 raise exception.WatcherException
103 def get_node_status_str(self, node):
104 """Get node status in string format"""
105 if isinstance(node.status, str):
106 return node.status
107 elif isinstance(node.status, element.ServiceState):
108 return node.status.value
109 else:
110 LOG.error('Unexpected node status type, '
111 'status=%(status)s, status_type=%(st)s.',
112 dict(status=node.status,
113 st=type(node.status)))
114 raise exception.WatcherException
116 def get_node_capacity(self, node):
117 """Collect cpu, ram and disk capacity of a node.
119 :param node: node object
120 :return: dict(cpu(cores), ram(MB), disk(B))
121 """
122 return dict(cpu=node.vcpu_capacity,
123 ram=node.memory_mb_capacity,
124 disk=node.disk_gb_capacity)
126 def host_fits(self, source_node, destination_node):
127 """check host fits
129 return True if VMs could intensively migrate
130 from source_node to destination_node.
131 """
133 source_node_used = self.compute_model.get_node_used_resources(
134 source_node)
135 destination_node_free = self.compute_model.get_node_free_resources(
136 destination_node)
137 metrics = ['vcpu', 'memory']
138 for m in metrics:
139 if source_node_used[m] > destination_node_free[m]: 139 ↛ 140line 139 didn't jump to line 140 because the condition on line 139 was never true
140 return False
141 return True
143 def add_action_enable_compute_node(self, node):
144 """Add an action for node enabler into the solution."""
145 params = {'state': element.ServiceState.ENABLED.value,
146 'resource_name': node.hostname}
147 self.solution.add_action(
148 action_type=self.CHANGE_NOVA_SERVICE_STATE,
149 resource_id=node.uuid,
150 input_parameters=params)
152 def add_action_maintain_compute_node(self, node):
153 """Add an action for node maintenance into the solution."""
154 params = {'state': element.ServiceState.DISABLED.value,
155 'disabled_reason': self.REASON_FOR_MAINTAINING,
156 'resource_name': node.hostname}
157 self.solution.add_action(
158 action_type=self.CHANGE_NOVA_SERVICE_STATE,
159 resource_id=node.uuid,
160 input_parameters=params)
162 def enable_compute_node_if_disabled(self, node):
163 node_status_str = self.get_node_status_str(node)
164 if node_status_str != element.ServiceState.ENABLED.value:
165 self.add_action_enable_compute_node(node)
167 def instance_migration(self, instance, src_node, des_node=None):
168 """Add an action for instance migration into the solution.
170 :param instance: instance object
171 :param src_node: node object
172 :param des_node: node object. if None, the instance will be
173 migrated relying on nova-scheduler
174 :return: None
175 """
176 instance_state_str = self.get_instance_state_str(instance)
177 if instance_state_str == element.InstanceState.ACTIVE.value: 177 ↛ 180line 177 didn't jump to line 180 because the condition on line 177 was always true
178 migration_type = 'live'
179 else:
180 migration_type = 'cold'
182 params = {'migration_type': migration_type,
183 'source_node': src_node.uuid,
184 'resource_name': instance.name}
185 if des_node:
186 params['destination_node'] = des_node.hostname
187 self.solution.add_action(action_type=self.INSTANCE_MIGRATION,
188 resource_id=instance.uuid,
189 input_parameters=params)
191 def host_migration(self, source_node, destination_node):
192 """host migration
194 Migrate all instances from source_node to destination_node.
195 Active instances use "live-migrate",
196 and other instances use "cold-migrate"
197 """
198 instances = self.compute_model.get_node_instances(source_node)
199 for instance in instances:
200 self.instance_migration(instance, source_node, destination_node)
202 def safe_maintain(self, maintenance_node, backup_node=None):
203 """safe maintain one compute node
205 Migrate all instances of the maintenance_node intensively to the
206 backup host.
208 It calculate the resource both of the backup node and maintaining
209 node to evaluate the migrations from maintaining node to backup node.
210 If all instances of the maintaining node can migrated to
211 the backup node, it will set the maintaining node in
212 'watcher_maintaining' status, and add the migrations to solution.
213 """
214 # If the user gives a backup node with required capacity, then migrates
215 # all instances from the maintaining node to the backup node.
216 if backup_node:
217 if self.host_fits(maintenance_node, backup_node): 217 ↛ 223line 217 didn't jump to line 223 because the condition on line 217 was always true
218 self.enable_compute_node_if_disabled(backup_node)
219 self.add_action_maintain_compute_node(maintenance_node)
220 self.host_migration(maintenance_node, backup_node)
221 return True
223 return False
225 def try_maintain(self, maintenance_node):
226 """try to maintain one compute node
228 It firstly set the maintenance_node in 'watcher_maintaining' status.
229 Then try to migrate all instances of the maintenance node, rely
230 on nova-scheduler.
231 """
232 self.add_action_maintain_compute_node(maintenance_node)
233 instances = self.compute_model.get_node_instances(maintenance_node)
234 for instance in instances:
235 self.instance_migration(instance, maintenance_node)
237 def pre_execute(self):
238 self._pre_execute()
240 def do_execute(self, audit=None):
241 LOG.info(_('Executing Host Maintenance Migration Strategy'))
243 maintenance_node = self.input_parameters.get('maintenance_node')
244 backup_node = self.input_parameters.get('backup_node')
246 # if no VMs in the maintenance_node, just maintain the compute node
247 src_node = self.compute_model.get_node_by_name(maintenance_node)
248 if len(self.compute_model.get_node_instances(src_node)) == 0: 248 ↛ 249line 248 didn't jump to line 249 because the condition on line 248 was never true
249 if (src_node.disabled_reason !=
250 self.REASON_FOR_MAINTAINING):
251 self.add_action_maintain_compute_node(src_node)
252 return
254 if backup_node: 254 ↛ 257line 254 didn't jump to line 257 because the condition on line 254 was always true
255 des_node = self.compute_model.get_node_by_name(backup_node)
256 else:
257 des_node = None
259 if not self.safe_maintain(src_node, des_node): 259 ↛ 260line 259 didn't jump to line 260 because the condition on line 259 was never true
260 self.try_maintain(src_node)
262 def post_execute(self):
263 """Post-execution phase
265 This can be used to compute the global efficacy
266 """
267 LOG.debug(self.solution.actions)
268 LOG.debug(self.compute_model.to_string())