Coverage for watcher/decision_engine/strategy/strategies/noisy_neighbor.py: 83%

125 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-06-17 12:22 +0000

1# -*- encoding: utf-8 -*- 

2# Copyright (c) 2017 Intel Corp 

3# 

4# Authors: Prudhvi Rao Shedimbi <prudhvi.rao.shedimbi@intel.com> 

5# 

6# Licensed under the Apache License, Version 2.0 (the "License"); 

7# you may not use this file except in compliance with the License. 

8# You may obtain a copy of the License at 

9# 

10# http://www.apache.org/licenses/LICENSE-2.0 

11# 

12# Unless required by applicable law or agreed to in writing, software 

13# distributed under the License is distributed on an "AS IS" BASIS, 

14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 

15# implied. 

16# See the License for the specific language governing permissions and 

17# limitations under the License. 

18# 

19from debtcollector import removals 

20from oslo_config import cfg 

21from oslo_log import log 

22import warnings 

23 

24from watcher._i18n import _ 

25from watcher.decision_engine.strategy.strategies import base 

26 

27LOG = log.getLogger(__name__) 

28CONF = cfg.CONF 

29 

30warnings.simplefilter('always') 

31 

32 

33@removals.removed_class("NoisyNeighbor", version="2025.2", 

34 removal_version="2026.2") 

35class NoisyNeighbor(base.NoisyNeighborBaseStrategy): 

36 """Noisy Neighbor strategy using live migration 

37 

38 *Description* 

39 

40 This strategy can identify and migrate a Noisy Neighbor - 

41 a low priority VM that negatively affects performance of 

42 a high priority VM in terms of IPC by over utilizing 

43 Last Level Cache. 

44 

45 *Requirements* 

46 

47 To enable LLC metric, latest Intel server with CMT support is required. 

48 

49 *Limitations* 

50 

51 This is a proof of concept that is not meant to be used in production 

52 

53 *Spec URL* 

54 

55 http://specs.openstack.org/openstack/watcher-specs/specs/pike/implemented/noisy_neighbor_strategy.html 

56 """ 

57 

58 MIGRATION = "migrate" 

59 

60 DATASOURCE_METRICS = ['instance_l3_cache_usage'] 

61 

62 DEFAULT_WATCHER_PRIORITY = 5 

63 

64 def __init__(self, config, osc=None): 

65 super(NoisyNeighbor, self).__init__(config, osc) 

66 

67 self.meter_name = 'instance_l3_cache_usage' 

68 

69 @classmethod 

70 def get_name(cls): 

71 return "noisy_neighbor" 

72 

73 @classmethod 

74 def get_display_name(cls): 

75 return _("Noisy Neighbor") 

76 

77 @classmethod 

78 def get_translatable_display_name(cls): 

79 return "Noisy Neighbor" 

80 

81 @classmethod 

82 def get_schema(cls): 

83 # Mandatory default setting for each element 

84 return { 

85 "properties": { 

86 "cache_threshold": { 

87 "description": "Performance drop in L3_cache threshold " 

88 "for migration", 

89 "type": "number", 

90 "default": 35.0 

91 }, 

92 "period": { 

93 "description": "Aggregate time period of " 

94 "ceilometer and gnocchi", 

95 "type": "number", 

96 "default": 100.0 

97 }, 

98 }, 

99 } 

100 

101 def get_current_and_previous_cache(self, instance): 

102 try: 

103 curr_cache = self.datasource_backend.get_instance_l3_cache_usage( 

104 instance, self.meter_name, self.period, 

105 'mean', granularity=300) 

106 previous_cache = 2 * ( 

107 self.datasource_backend.get_instance_l3_cache_usage( 

108 instance, self.meter_name, 2 * self.period, 

109 'mean', granularity=300)) - curr_cache 

110 

111 except Exception as exc: 

112 LOG.exception(exc) 

113 return None, None 

114 

115 return curr_cache, previous_cache 

116 

117 def find_priority_instance(self, instance): 

118 

119 current_cache, previous_cache = \ 

120 self.get_current_and_previous_cache(instance) 

121 

122 if None in (current_cache, previous_cache): 122 ↛ 123line 122 didn't jump to line 123 because the condition on line 122 was never true

123 LOG.warning("Datasource unable to pick L3 Cache " 

124 "values. Skipping the instance") 

125 return None 

126 

127 if (current_cache < (1 - (self.cache_threshold / 100.0)) * 

128 previous_cache): 

129 return instance 

130 else: 

131 return None 

132 

133 def find_noisy_instance(self, instance): 

134 

135 noisy_current_cache, noisy_previous_cache = \ 

136 self.get_current_and_previous_cache(instance) 

137 

138 if None in (noisy_current_cache, noisy_previous_cache): 138 ↛ 139line 138 didn't jump to line 139 because the condition on line 138 was never true

139 LOG.warning("Datasource unable to pick " 

140 "L3 Cache. Skipping the instance") 

141 return None 

142 

143 if (noisy_current_cache > (1 + (self.cache_threshold / 100.0)) * 143 ↛ 147line 143 didn't jump to line 147 because the condition on line 143 was always true

144 noisy_previous_cache): 

145 return instance 

146 else: 

147 return None 

148 

149 def group_hosts(self): 

150 nodes = self.compute_model.get_all_compute_nodes() 

151 hosts_need_release = {} 

152 hosts_target = [] 

153 

154 for node in nodes.values(): 

155 instances_of_node = self.compute_model.get_node_instances(node) 

156 node_instance_count = len(instances_of_node) 

157 

158 # Flag that tells us whether to skip the node or not. If True, 

159 # the node is skipped. Will be true if we find a noisy instance or 

160 # when potential priority instance will be same as potential noisy 

161 # instance 

162 loop_break_flag = False 

163 

164 if node_instance_count > 1: 

165 

166 instance_priority_list = [] 

167 

168 for instance in instances_of_node: 

169 instance_priority_list.append(instance) 

170 

171 # If there is no metadata regarding watcher-priority, it takes 

172 # DEFAULT_WATCHER_PRIORITY as priority. 

173 instance_priority_list.sort(key=lambda a: ( 

174 a.get('metadata').get('watcher-priority'), 

175 self.DEFAULT_WATCHER_PRIORITY)) 

176 

177 instance_priority_list_reverse = list(instance_priority_list) 

178 instance_priority_list_reverse.reverse() 

179 

180 for potential_priority_instance in instance_priority_list: 

181 

182 priority_instance = self.find_priority_instance( 

183 potential_priority_instance) 

184 

185 if (priority_instance is not None): 

186 

187 for potential_noisy_instance in ( 187 ↛ 209line 187 didn't jump to line 209 because the loop on line 187 didn't complete

188 instance_priority_list_reverse): 

189 if (potential_noisy_instance == 189 ↛ 191line 189 didn't jump to line 191 because the condition on line 189 was never true

190 potential_priority_instance): 

191 loop_break_flag = True 

192 break 

193 

194 noisy_instance = self.find_noisy_instance( 

195 potential_noisy_instance) 

196 

197 if noisy_instance is not None: 197 ↛ 187line 197 didn't jump to line 187 because the condition on line 197 was always true

198 hosts_need_release[node.uuid] = { 

199 'priority_vm': potential_priority_instance, 

200 'noisy_vm': potential_noisy_instance} 

201 LOG.debug("Priority VM found: %s", 

202 potential_priority_instance.uuid) 

203 LOG.debug("Noisy VM found: %s", 

204 potential_noisy_instance.uuid) 

205 loop_break_flag = True 

206 break 

207 

208 # No need to check other instances in the node 

209 if loop_break_flag is True: 

210 break 

211 

212 if node.uuid not in hosts_need_release: 

213 hosts_target.append(node) 

214 

215 return hosts_need_release, hosts_target 

216 

217 def filter_dest_servers(self, hosts, instance_to_migrate): 

218 required_cores = instance_to_migrate.vcpus 

219 required_disk = instance_to_migrate.disk 

220 required_memory = instance_to_migrate.memory 

221 

222 dest_servers = [] 

223 for host in hosts: 

224 free_res = self.compute_model.get_node_free_resources(host) 

225 if (free_res['vcpu'] >= required_cores and free_res['disk'] >= 225 ↛ 223line 225 didn't jump to line 223 because the condition on line 225 was always true

226 required_disk and free_res['memory'] >= required_memory): 

227 dest_servers.append(host) 

228 

229 return dest_servers 

230 

231 def pre_execute(self): 

232 self._pre_execute() 

233 

234 def do_execute(self, audit=None): 

235 self.cache_threshold = self.input_parameters.cache_threshold 

236 self.period = self.input_parameters.period 

237 

238 hosts_need_release, hosts_target = self.group_hosts() 

239 

240 if len(hosts_need_release) == 0: 

241 LOG.debug("No hosts require optimization") 

242 return 

243 

244 if len(hosts_target) == 0: 244 ↛ 245line 244 didn't jump to line 245 because the condition on line 244 was never true

245 LOG.debug("No hosts available to migrate") 

246 return 

247 

248 mig_source_node_name = max(hosts_need_release.keys(), key=lambda a: 

249 hosts_need_release[a]['priority_vm']) 

250 instance_to_migrate = hosts_need_release[mig_source_node_name][ 

251 'noisy_vm'] 

252 

253 if instance_to_migrate is None: 253 ↛ 254line 253 didn't jump to line 254 because the condition on line 253 was never true

254 return 

255 

256 dest_servers = self.filter_dest_servers(hosts_target, 

257 instance_to_migrate) 

258 

259 if len(dest_servers) == 0: 259 ↛ 260line 259 didn't jump to line 260 because the condition on line 259 was never true

260 LOG.info("No proper target host could be found") 

261 return 

262 

263 # Destination node will be the first available node in the list. 

264 mig_destination_node = dest_servers[0] 

265 mig_source_node = self.compute_model.get_node_by_uuid( 

266 mig_source_node_name) 

267 

268 if self.compute_model.migrate_instance(instance_to_migrate, 268 ↛ exitline 268 didn't return from function 'do_execute' because the condition on line 268 was always true

269 mig_source_node, 

270 mig_destination_node): 

271 parameters = {'migration_type': 'live', 

272 'source_node': mig_source_node.uuid, 

273 'destination_node': mig_destination_node.uuid, 

274 'resource_name': instance_to_migrate.name} 

275 self.solution.add_action(action_type=self.MIGRATION, 

276 resource_id=instance_to_migrate.uuid, 

277 input_parameters=parameters) 

278 

279 def post_execute(self): 

280 self.solution.model = self.compute_model 

281 

282 LOG.debug(self.compute_model.to_string())