Coverage for watcher/decision_engine/datasources/gnocchi.py: 70%
107 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-17 12:22 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-17 12:22 +0000
1# -*- encoding: utf-8 -*-
2# Copyright (c) 2017 Servionica
3#
4# Authors: Alexander Chadin <a.chadin@servionica.ru>
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
15# implied.
16# See the License for the specific language governing permissions and
17# limitations under the License.
19from datetime import timedelta
21from gnocchiclient import exceptions as gnc_exc
22from oslo_config import cfg
23from oslo_log import log
24from oslo_utils import timeutils
26from watcher.common import clients
27from watcher.decision_engine.datasources import base
29CONF = cfg.CONF
30LOG = log.getLogger(__name__)
33class GnocchiHelper(base.DataSourceBase):
35 NAME = 'gnocchi'
36 METRIC_MAP = dict(host_cpu_usage='compute.node.cpu.percent',
37 host_ram_usage='hardware.memory.used',
38 host_outlet_temp='hardware.ipmi.node.outlet_temperature',
39 host_inlet_temp='hardware.ipmi.node.temperature',
40 host_airflow='hardware.ipmi.node.airflow',
41 host_power='hardware.ipmi.node.power',
42 instance_cpu_usage='cpu',
43 instance_ram_usage='memory.resident',
44 instance_ram_allocated='memory',
45 instance_l3_cache_usage='cpu_l3_cache',
46 instance_root_disk_size='disk.root.size',
47 )
49 def __init__(self, osc=None):
50 """:param osc: an OpenStackClients instance"""
51 self.osc = osc if osc else clients.OpenStackClients()
52 self.gnocchi = self.osc.gnocchi()
54 def check_availability(self):
55 status = self.query_retry(self.gnocchi.status.get)
56 if status: 56 ↛ 59line 56 didn't jump to line 59 because the condition on line 56 was always true
57 return 'available'
58 else:
59 return 'not available'
61 def list_metrics(self):
62 """List the user's meters."""
63 response = self.query_retry(f=self.gnocchi.metric.list)
64 if not response:
65 return set()
66 else:
67 return set([metric['name'] for metric in response])
69 def statistic_aggregation(self, resource=None, resource_type=None,
70 meter_name=None, period=300, aggregate='mean',
71 granularity=300):
72 stop_time = timeutils.utcnow()
73 start_time = stop_time - timedelta(seconds=(int(period)))
75 meter = self._get_meter(meter_name)
77 if aggregate == 'count': 77 ↛ 78line 77 didn't jump to line 78 because the condition on line 77 was never true
78 aggregate = 'mean'
79 LOG.warning('aggregate type count not supported by gnocchi,'
80 ' replaced with mean.')
82 resource_id = resource.uuid
83 if resource_type == 'compute_node': 83 ↛ 84line 83 didn't jump to line 84 because the condition on line 83 was never true
84 resource_id = "%s_%s" % (resource.hostname, resource.hostname)
85 kwargs = dict(query={"=": {"original_resource_id": resource_id}},
86 limit=1)
87 resources = self.query_retry(
88 f=self.gnocchi.resource.search,
89 ignored_exc=gnc_exc.NotFound,
90 **kwargs)
92 if not resources:
93 LOG.warning("The %s resource %s could not be found",
94 self.NAME, resource_id)
95 return
97 resource_id = resources[0]['id']
99 if meter_name == "instance_cpu_usage": 99 ↛ 118line 99 didn't jump to line 118 because the condition on line 99 was always true
100 if resource_type != "instance": 100 ↛ 101line 100 didn't jump to line 101 because the condition on line 100 was never true
101 LOG.warning("Unsupported resource type for metric "
102 "'instance_cpu_usage': %s", resource_type)
103 return
105 # The "cpu_util" gauge (percentage) metric has been removed.
106 # We're going to obtain the same result by using the rate of change
107 # aggregate operation.
108 if aggregate not in ("mean", "rate:mean"): 108 ↛ 109line 108 didn't jump to line 109 because the condition on line 108 was never true
109 LOG.warning("Unsupported aggregate for instance_cpu_usage "
110 "metric: %s. "
111 "Supported aggregates: mean, rate:mean ",
112 aggregate)
113 return
115 # TODO(lpetrut): consider supporting other aggregates.
116 aggregate = "rate:mean"
118 raw_kwargs = dict(
119 metric=meter,
120 start=start_time,
121 stop=stop_time,
122 resource_id=resource_id,
123 granularity=granularity,
124 aggregation=aggregate,
125 )
127 kwargs = {k: v for k, v in raw_kwargs.items() if k and v}
129 statistics = self.query_retry(
130 f=self.gnocchi.metric.get_measures,
131 ignored_exc=gnc_exc.NotFound,
132 **kwargs)
134 return_value = None
135 if statistics: 135 ↛ 156line 135 didn't jump to line 156 because the condition on line 135 was always true
136 # return value of latest measure
137 # measure has structure [time, granularity, value]
138 return_value = statistics[-1][2]
140 if meter_name == 'host_airflow': 140 ↛ 143line 140 didn't jump to line 143 because the condition on line 140 was never true
141 # Airflow from hardware.ipmi.node.airflow is reported as
142 # 1/10 th of actual CFM
143 return_value *= 10
144 if meter_name == "instance_cpu_usage": 144 ↛ 156line 144 didn't jump to line 156 because the condition on line 144 was always true
145 # "rate:mean" can return negative values for migrated vms.
146 return_value = max(0, return_value)
148 # We're converting the cumulative cpu time (ns) to cpu usage
149 # percentage.
150 vcpus = resource.vcpus
151 if not vcpus: 151 ↛ 152line 151 didn't jump to line 152 because the condition on line 151 was never true
152 LOG.warning("instance vcpu count not set, assuming 1")
153 vcpus = 1
154 return_value *= 100 / (granularity * 10e+8) / vcpus
156 return return_value
158 def statistic_series(self, resource=None, resource_type=None,
159 meter_name=None, start_time=None, end_time=None,
160 granularity=300):
162 meter = self._get_meter(meter_name)
164 resource_id = resource.uuid
165 if resource_type == 'compute_node': 165 ↛ 166line 165 didn't jump to line 166 because the condition on line 165 was never true
166 resource_id = "%s_%s" % (resource.hostname, resource.hostname)
167 kwargs = dict(query={"=": {"original_resource_id": resource_id}},
168 limit=1)
169 resources = self.query_retry(
170 f=self.gnocchi.resource.search,
171 ignored_exc=gnc_exc.NotFound,
172 **kwargs)
174 if not resources:
175 LOG.warning("The %s resource %s could not be found",
176 self.NAME, resource_id)
177 return
179 resource_id = resources[0]['id']
181 raw_kwargs = dict(
182 metric=meter,
183 start=start_time,
184 stop=end_time,
185 resource_id=resource_id,
186 granularity=granularity,
187 )
189 kwargs = {k: v for k, v in raw_kwargs.items() if k and v}
191 statistics = self.query_retry(
192 f=self.gnocchi.metric.get_measures,
193 ignored_exc=gnc_exc.NotFound,
194 **kwargs)
196 return_value = None
197 if statistics: 197 ↛ 206line 197 didn't jump to line 206 because the condition on line 197 was always true
198 # measure has structure [time, granularity, value]
199 if meter_name == 'host_airflow': 199 ↛ 202line 199 didn't jump to line 202 because the condition on line 199 was never true
200 # Airflow from hardware.ipmi.node.airflow is reported as
201 # 1/10 th of actual CFM
202 return_value = {s[0]: s[2]*10 for s in statistics}
203 else:
204 return_value = {s[0]: s[2] for s in statistics}
206 return return_value
208 def get_host_cpu_usage(self, resource, period, aggregate,
209 granularity=300):
211 return self.statistic_aggregation(
212 resource, 'compute_node', 'host_cpu_usage', period,
213 aggregate, granularity)
215 def get_host_ram_usage(self, resource, period, aggregate,
216 granularity=300):
218 return self.statistic_aggregation(
219 resource, 'compute_node', 'host_ram_usage', period,
220 aggregate, granularity)
222 def get_host_outlet_temp(self, resource, period, aggregate,
223 granularity=300):
225 return self.statistic_aggregation(
226 resource, 'compute_node', 'host_outlet_temp', period,
227 aggregate, granularity)
229 def get_host_inlet_temp(self, resource, period, aggregate,
230 granularity=300):
232 return self.statistic_aggregation(
233 resource, 'compute_node', 'host_inlet_temp', period,
234 aggregate, granularity)
236 def get_host_airflow(self, resource, period, aggregate,
237 granularity=300):
239 return self.statistic_aggregation(
240 resource, 'compute_node', 'host_airflow', period,
241 aggregate, granularity)
243 def get_host_power(self, resource, period, aggregate,
244 granularity=300):
246 return self.statistic_aggregation(
247 resource, 'compute_node', 'host_power', period,
248 aggregate, granularity)
250 def get_instance_cpu_usage(self, resource, period, aggregate,
251 granularity=300):
253 return self.statistic_aggregation(
254 resource, 'instance', 'instance_cpu_usage', period,
255 aggregate, granularity)
257 def get_instance_ram_usage(self, resource, period, aggregate,
258 granularity=300):
260 return self.statistic_aggregation(
261 resource, 'instance', 'instance_ram_usage', period,
262 aggregate, granularity)
264 def get_instance_ram_allocated(self, resource, period, aggregate,
265 granularity=300):
267 return self.statistic_aggregation(
268 resource, 'instance', 'instance_ram_allocated', period,
269 aggregate, granularity)
271 def get_instance_l3_cache_usage(self, resource, period, aggregate,
272 granularity=300):
274 return self.statistic_aggregation(
275 resource, 'instance', 'instance_l3_cache_usage', period,
276 aggregate, granularity)
278 def get_instance_root_disk_size(self, resource, period, aggregate,
279 granularity=300):
281 return self.statistic_aggregation(
282 resource, 'instance', 'instance_root_disk_size', period,
283 aggregate, granularity)