Coverage for watcher/decision_engine/strategy/strategies/host

1# -*- encoding: utf-8 -*-

4# Authors: suzhengwei<suzhengwei@chinac.com>

6# Licensed under the Apache License, Version 2.0 (the "License");

7# you may not use this file except in compliance with the License.

8# You may obtain a copy of the License at

10# http://www.apache.org/licenses/LICENSE-2.0

11#

12# Unless required by applicable law or agreed to in writing, software

13# distributed under the License is distributed on an "AS IS" BASIS,

14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or

15# implied.

16# See the License for the specific language governing permissions and

17# limitations under the License.

18#

20from oslo_log import log

21from watcher._i18n import _

22from watcher.common import exception

23from watcher.decision_engine.model import element

24from watcher.decision_engine.strategy.strategies import base

26LOG = log.getLogger(__name__)

29class HostMaintenance(base.HostMaintenanceBaseStrategy):

30 """[PoC]Host Maintenance

32 *Description*

34 It is a migration strategy for one compute node maintenance,

35 without having the user's application been interrupted.

36 If given one backup node, the strategy will firstly

37 migrate all instances from the maintenance node to

38 the backup node. If the backup node is not provided,

39 it will migrate all instances, relying on nova-scheduler.

41 *Requirements*

43 * You must have at least 2 physical compute nodes to run this strategy.

45 *Limitations*

47 - This is a proof of concept that is not meant to be used in production

48 - It migrates all instances from one host to other hosts. It's better to

49 execute such strategy when load is not heavy, and use this algorithm

50 with `ONESHOT` audit.

51 - It assumes that cold and live migrations are possible.

52 """

54 INSTANCE_MIGRATION = "migrate"

55 CHANGE_NOVA_SERVICE_STATE = "change_nova_service_state"

57 def __init__(self, config, osc=None):

58 super(HostMaintenance, self).__init__(config, osc)

60 @classmethod

61 def get_name(cls):

62 return "host_maintenance"

64 @classmethod

65 def get_display_name(cls):

66 return _("Host Maintenance Strategy")

68 @classmethod

69 def get_translatable_display_name(cls):

70 return "Host Maintenance Strategy"

72 @classmethod

73 def get_schema(cls):

74 return {

75 "properties": {

76 "maintenance_node": {

77 "description": "The name of the compute node which "

78 "need maintenance",

79 "type": "string",

80 },

81 "backup_node": {

82 "description": "The name of the compute node which "

83 "will backup the maintenance node.",

84 "type": "string",

85 },

86 },

87 "required": ["maintenance_node"],

88 }

90 def get_instance_state_str(self, instance):

91 """Get instance state in string format"""

92 if isinstance(instance.state, str):

93 return instance.state

94 elif isinstance(instance.state, element.InstanceState):

95 return instance.state.value

96 else:

97 LOG.error('Unexpected instance state type, '

98 'state=%(state)s, state_type=%(st)s.',

99 dict(state=instance.state,

100 st=type(instance.state)))

101 raise exception.WatcherException

102

103 def get_node_status_str(self, node):

104 """Get node status in string format"""

105 if isinstance(node.status, str):

106 return node.status

107 elif isinstance(node.status, element.ServiceState):

108 return node.status.value

109 else:

110 LOG.error('Unexpected node status type, '

111 'status=%(status)s, status_type=%(st)s.',

112 dict(status=node.status,

113 st=type(node.status)))

114 raise exception.WatcherException

115

116 def get_node_capacity(self, node):

117 """Collect cpu, ram and disk capacity of a node.

118

119 :param node: node object

120 :return: dict(cpu(cores), ram(MB), disk(B))

121 """

122 return dict(cpu=node.vcpu_capacity,

123 ram=node.memory_mb_capacity,

124 disk=node.disk_gb_capacity)

125

126 def host_fits(self, source_node, destination_node):

127 """check host fits

128

129 return True if VMs could intensively migrate

130 from source_node to destination_node.

131 """

132

133 source_node_used = self.compute_model.get_node_used_resources(

134 source_node)

135 destination_node_free = self.compute_model.get_node_free_resources(

136 destination_node)

137 metrics = ['vcpu', 'memory']

138 for m in metrics:

139 if source_node_used[m] > destination_node_free[m]: 139 ↛ 140line 139 didn't jump to line 140 because the condition on line 139 was never true

140 return False

141 return True

142

143 def add_action_enable_compute_node(self, node):

144 """Add an action for node enabler into the solution."""

145 params = {'state': element.ServiceState.ENABLED.value,

146 'resource_name': node.hostname}

147 self.solution.add_action(

148 action_type=self.CHANGE_NOVA_SERVICE_STATE,

149 resource_id=node.uuid,

150 input_parameters=params)

151

152 def add_action_maintain_compute_node(self, node):

153 """Add an action for node maintenance into the solution."""

154 params = {'state': element.ServiceState.DISABLED.value,

155 'disabled_reason': self.REASON_FOR_MAINTAINING,

156 'resource_name': node.hostname}

157 self.solution.add_action(

158 action_type=self.CHANGE_NOVA_SERVICE_STATE,

159 resource_id=node.uuid,

160 input_parameters=params)

161

162 def enable_compute_node_if_disabled(self, node):

163 node_status_str = self.get_node_status_str(node)

164 if node_status_str != element.ServiceState.ENABLED.value:

165 self.add_action_enable_compute_node(node)

166

167 def instance_migration(self, instance, src_node, des_node=None):

168 """Add an action for instance migration into the solution.

169

170 :param instance: instance object

171 :param src_node: node object

172 :param des_node: node object. if None, the instance will be

173 migrated relying on nova-scheduler

174 :return: None

175 """

176 instance_state_str = self.get_instance_state_str(instance)

177 if instance_state_str == element.InstanceState.ACTIVE.value: 177 ↛ 180line 177 didn't jump to line 180 because the condition on line 177 was always true

178 migration_type = 'live'

179 else:

180 migration_type = 'cold'

181

182 params = {'migration_type': migration_type,

183 'source_node': src_node.uuid,

184 'resource_name': instance.name}

185 if des_node:

186 params['destination_node'] = des_node.hostname

187 self.solution.add_action(action_type=self.INSTANCE_MIGRATION,

188 resource_id=instance.uuid,

189 input_parameters=params)

190

191 def host_migration(self, source_node, destination_node):

192 """host migration

193

194 Migrate all instances from source_node to destination_node.

195 Active instances use "live-migrate",

196 and other instances use "cold-migrate"

197 """

198 instances = self.compute_model.get_node_instances(source_node)

199 for instance in instances:

200 self.instance_migration(instance, source_node, destination_node)

201

202 def safe_maintain(self, maintenance_node, backup_node=None):

203 """safe maintain one compute node

204

205 Migrate all instances of the maintenance_node intensively to the

206 backup host.

207

208 It calculate the resource both of the backup node and maintaining

209 node to evaluate the migrations from maintaining node to backup node.

210 If all instances of the maintaining node can migrated to

211 the backup node, it will set the maintaining node in

212 'watcher_maintaining' status, and add the migrations to solution.

213 """

214 # If the user gives a backup node with required capacity, then migrates

215 # all instances from the maintaining node to the backup node.

216 if backup_node:

217 if self.host_fits(maintenance_node, backup_node): 217 ↛ 223line 217 didn't jump to line 223 because the condition on line 217 was always true

218 self.enable_compute_node_if_disabled(backup_node)

219 self.add_action_maintain_compute_node(maintenance_node)

220 self.host_migration(maintenance_node, backup_node)

221 return True

222

223 return False

224

225 def try_maintain(self, maintenance_node):

226 """try to maintain one compute node

227

228 It firstly set the maintenance_node in 'watcher_maintaining' status.

229 Then try to migrate all instances of the maintenance node, rely

230 on nova-scheduler.

231 """

232 self.add_action_maintain_compute_node(maintenance_node)

233 instances = self.compute_model.get_node_instances(maintenance_node)

234 for instance in instances:

235 self.instance_migration(instance, maintenance_node)

236

237 def pre_execute(self):

238 self._pre_execute()

239

240 def do_execute(self, audit=None):

241 LOG.info(_('Executing Host Maintenance Migration Strategy'))

242

243 maintenance_node = self.input_parameters.get('maintenance_node')

244 backup_node = self.input_parameters.get('backup_node')

245

246 # if no VMs in the maintenance_node, just maintain the compute node

247 src_node = self.compute_model.get_node_by_name(maintenance_node)

248 if len(self.compute_model.get_node_instances(src_node)) == 0: 248 ↛ 249line 248 didn't jump to line 249 because the condition on line 248 was never true

249 if (src_node.disabled_reason !=

250 self.REASON_FOR_MAINTAINING):

251 self.add_action_maintain_compute_node(src_node)

252 return

253

254 if backup_node: 254 ↛ 257line 254 didn't jump to line 257 because the condition on line 254 was always true

255 des_node = self.compute_model.get_node_by_name(backup_node)

256 else:

257 des_node = None

258

259 if not self.safe_maintain(src_node, des_node): 259 ↛ 260line 259 didn't jump to line 260 because the condition on line 259 was never true

260 self.try_maintain(src_node)

261

262 def post_execute(self):

263 """Post-execution phase

264

265 This can be used to compute the global efficacy

266 """

267 LOG.debug(self.solution.actions)

268 LOG.debug(self.compute_model.to_string())

Coverage for watcher/decision_engine/strategy/strategies/host_maintenance.py: 88%

102 statements