Coverage for watcher/decision_engine/datasources/gnocchi.py: 70%

107 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-06-17 12:22 +0000

1# -*- encoding: utf-8 -*- 

2# Copyright (c) 2017 Servionica 

3# 

4# Authors: Alexander Chadin <a.chadin@servionica.ru> 

5# 

6# Licensed under the Apache License, Version 2.0 (the "License"); 

7# you may not use this file except in compliance with the License. 

8# You may obtain a copy of the License at 

9# 

10# http://www.apache.org/licenses/LICENSE-2.0 

11# 

12# Unless required by applicable law or agreed to in writing, software 

13# distributed under the License is distributed on an "AS IS" BASIS, 

14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 

15# implied. 

16# See the License for the specific language governing permissions and 

17# limitations under the License. 

18 

19from datetime import timedelta 

20 

21from gnocchiclient import exceptions as gnc_exc 

22from oslo_config import cfg 

23from oslo_log import log 

24from oslo_utils import timeutils 

25 

26from watcher.common import clients 

27from watcher.decision_engine.datasources import base 

28 

29CONF = cfg.CONF 

30LOG = log.getLogger(__name__) 

31 

32 

33class GnocchiHelper(base.DataSourceBase): 

34 

35 NAME = 'gnocchi' 

36 METRIC_MAP = dict(host_cpu_usage='compute.node.cpu.percent', 

37 host_ram_usage='hardware.memory.used', 

38 host_outlet_temp='hardware.ipmi.node.outlet_temperature', 

39 host_inlet_temp='hardware.ipmi.node.temperature', 

40 host_airflow='hardware.ipmi.node.airflow', 

41 host_power='hardware.ipmi.node.power', 

42 instance_cpu_usage='cpu', 

43 instance_ram_usage='memory.resident', 

44 instance_ram_allocated='memory', 

45 instance_l3_cache_usage='cpu_l3_cache', 

46 instance_root_disk_size='disk.root.size', 

47 ) 

48 

49 def __init__(self, osc=None): 

50 """:param osc: an OpenStackClients instance""" 

51 self.osc = osc if osc else clients.OpenStackClients() 

52 self.gnocchi = self.osc.gnocchi() 

53 

54 def check_availability(self): 

55 status = self.query_retry(self.gnocchi.status.get) 

56 if status: 56 ↛ 59line 56 didn't jump to line 59 because the condition on line 56 was always true

57 return 'available' 

58 else: 

59 return 'not available' 

60 

61 def list_metrics(self): 

62 """List the user's meters.""" 

63 response = self.query_retry(f=self.gnocchi.metric.list) 

64 if not response: 

65 return set() 

66 else: 

67 return set([metric['name'] for metric in response]) 

68 

69 def statistic_aggregation(self, resource=None, resource_type=None, 

70 meter_name=None, period=300, aggregate='mean', 

71 granularity=300): 

72 stop_time = timeutils.utcnow() 

73 start_time = stop_time - timedelta(seconds=(int(period))) 

74 

75 meter = self._get_meter(meter_name) 

76 

77 if aggregate == 'count': 77 ↛ 78line 77 didn't jump to line 78 because the condition on line 77 was never true

78 aggregate = 'mean' 

79 LOG.warning('aggregate type count not supported by gnocchi,' 

80 ' replaced with mean.') 

81 

82 resource_id = resource.uuid 

83 if resource_type == 'compute_node': 83 ↛ 84line 83 didn't jump to line 84 because the condition on line 83 was never true

84 resource_id = "%s_%s" % (resource.hostname, resource.hostname) 

85 kwargs = dict(query={"=": {"original_resource_id": resource_id}}, 

86 limit=1) 

87 resources = self.query_retry( 

88 f=self.gnocchi.resource.search, 

89 ignored_exc=gnc_exc.NotFound, 

90 **kwargs) 

91 

92 if not resources: 

93 LOG.warning("The %s resource %s could not be found", 

94 self.NAME, resource_id) 

95 return 

96 

97 resource_id = resources[0]['id'] 

98 

99 if meter_name == "instance_cpu_usage": 99 ↛ 118line 99 didn't jump to line 118 because the condition on line 99 was always true

100 if resource_type != "instance": 100 ↛ 101line 100 didn't jump to line 101 because the condition on line 100 was never true

101 LOG.warning("Unsupported resource type for metric " 

102 "'instance_cpu_usage': %s", resource_type) 

103 return 

104 

105 # The "cpu_util" gauge (percentage) metric has been removed. 

106 # We're going to obtain the same result by using the rate of change 

107 # aggregate operation. 

108 if aggregate not in ("mean", "rate:mean"): 108 ↛ 109line 108 didn't jump to line 109 because the condition on line 108 was never true

109 LOG.warning("Unsupported aggregate for instance_cpu_usage " 

110 "metric: %s. " 

111 "Supported aggregates: mean, rate:mean ", 

112 aggregate) 

113 return 

114 

115 # TODO(lpetrut): consider supporting other aggregates. 

116 aggregate = "rate:mean" 

117 

118 raw_kwargs = dict( 

119 metric=meter, 

120 start=start_time, 

121 stop=stop_time, 

122 resource_id=resource_id, 

123 granularity=granularity, 

124 aggregation=aggregate, 

125 ) 

126 

127 kwargs = {k: v for k, v in raw_kwargs.items() if k and v} 

128 

129 statistics = self.query_retry( 

130 f=self.gnocchi.metric.get_measures, 

131 ignored_exc=gnc_exc.NotFound, 

132 **kwargs) 

133 

134 return_value = None 

135 if statistics: 135 ↛ 156line 135 didn't jump to line 156 because the condition on line 135 was always true

136 # return value of latest measure 

137 # measure has structure [time, granularity, value] 

138 return_value = statistics[-1][2] 

139 

140 if meter_name == 'host_airflow': 140 ↛ 143line 140 didn't jump to line 143 because the condition on line 140 was never true

141 # Airflow from hardware.ipmi.node.airflow is reported as 

142 # 1/10 th of actual CFM 

143 return_value *= 10 

144 if meter_name == "instance_cpu_usage": 144 ↛ 156line 144 didn't jump to line 156 because the condition on line 144 was always true

145 # "rate:mean" can return negative values for migrated vms. 

146 return_value = max(0, return_value) 

147 

148 # We're converting the cumulative cpu time (ns) to cpu usage 

149 # percentage. 

150 vcpus = resource.vcpus 

151 if not vcpus: 151 ↛ 152line 151 didn't jump to line 152 because the condition on line 151 was never true

152 LOG.warning("instance vcpu count not set, assuming 1") 

153 vcpus = 1 

154 return_value *= 100 / (granularity * 10e+8) / vcpus 

155 

156 return return_value 

157 

158 def statistic_series(self, resource=None, resource_type=None, 

159 meter_name=None, start_time=None, end_time=None, 

160 granularity=300): 

161 

162 meter = self._get_meter(meter_name) 

163 

164 resource_id = resource.uuid 

165 if resource_type == 'compute_node': 165 ↛ 166line 165 didn't jump to line 166 because the condition on line 165 was never true

166 resource_id = "%s_%s" % (resource.hostname, resource.hostname) 

167 kwargs = dict(query={"=": {"original_resource_id": resource_id}}, 

168 limit=1) 

169 resources = self.query_retry( 

170 f=self.gnocchi.resource.search, 

171 ignored_exc=gnc_exc.NotFound, 

172 **kwargs) 

173 

174 if not resources: 

175 LOG.warning("The %s resource %s could not be found", 

176 self.NAME, resource_id) 

177 return 

178 

179 resource_id = resources[0]['id'] 

180 

181 raw_kwargs = dict( 

182 metric=meter, 

183 start=start_time, 

184 stop=end_time, 

185 resource_id=resource_id, 

186 granularity=granularity, 

187 ) 

188 

189 kwargs = {k: v for k, v in raw_kwargs.items() if k and v} 

190 

191 statistics = self.query_retry( 

192 f=self.gnocchi.metric.get_measures, 

193 ignored_exc=gnc_exc.NotFound, 

194 **kwargs) 

195 

196 return_value = None 

197 if statistics: 197 ↛ 206line 197 didn't jump to line 206 because the condition on line 197 was always true

198 # measure has structure [time, granularity, value] 

199 if meter_name == 'host_airflow': 199 ↛ 202line 199 didn't jump to line 202 because the condition on line 199 was never true

200 # Airflow from hardware.ipmi.node.airflow is reported as 

201 # 1/10 th of actual CFM 

202 return_value = {s[0]: s[2]*10 for s in statistics} 

203 else: 

204 return_value = {s[0]: s[2] for s in statistics} 

205 

206 return return_value 

207 

208 def get_host_cpu_usage(self, resource, period, aggregate, 

209 granularity=300): 

210 

211 return self.statistic_aggregation( 

212 resource, 'compute_node', 'host_cpu_usage', period, 

213 aggregate, granularity) 

214 

215 def get_host_ram_usage(self, resource, period, aggregate, 

216 granularity=300): 

217 

218 return self.statistic_aggregation( 

219 resource, 'compute_node', 'host_ram_usage', period, 

220 aggregate, granularity) 

221 

222 def get_host_outlet_temp(self, resource, period, aggregate, 

223 granularity=300): 

224 

225 return self.statistic_aggregation( 

226 resource, 'compute_node', 'host_outlet_temp', period, 

227 aggregate, granularity) 

228 

229 def get_host_inlet_temp(self, resource, period, aggregate, 

230 granularity=300): 

231 

232 return self.statistic_aggregation( 

233 resource, 'compute_node', 'host_inlet_temp', period, 

234 aggregate, granularity) 

235 

236 def get_host_airflow(self, resource, period, aggregate, 

237 granularity=300): 

238 

239 return self.statistic_aggregation( 

240 resource, 'compute_node', 'host_airflow', period, 

241 aggregate, granularity) 

242 

243 def get_host_power(self, resource, period, aggregate, 

244 granularity=300): 

245 

246 return self.statistic_aggregation( 

247 resource, 'compute_node', 'host_power', period, 

248 aggregate, granularity) 

249 

250 def get_instance_cpu_usage(self, resource, period, aggregate, 

251 granularity=300): 

252 

253 return self.statistic_aggregation( 

254 resource, 'instance', 'instance_cpu_usage', period, 

255 aggregate, granularity) 

256 

257 def get_instance_ram_usage(self, resource, period, aggregate, 

258 granularity=300): 

259 

260 return self.statistic_aggregation( 

261 resource, 'instance', 'instance_ram_usage', period, 

262 aggregate, granularity) 

263 

264 def get_instance_ram_allocated(self, resource, period, aggregate, 

265 granularity=300): 

266 

267 return self.statistic_aggregation( 

268 resource, 'instance', 'instance_ram_allocated', period, 

269 aggregate, granularity) 

270 

271 def get_instance_l3_cache_usage(self, resource, period, aggregate, 

272 granularity=300): 

273 

274 return self.statistic_aggregation( 

275 resource, 'instance', 'instance_l3_cache_usage', period, 

276 aggregate, granularity) 

277 

278 def get_instance_root_disk_size(self, resource, period, aggregate, 

279 granularity=300): 

280 

281 return self.statistic_aggregation( 

282 resource, 'instance', 'instance_root_disk_size', period, 

283 aggregate, granularity)