Coverage for watcher/decision_engine/scoring/dummy_scorer.py: 91%

56 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-06-17 12:22 +0000

1# -*- encoding: utf-8 -*- 

2# Copyright (c) 2016 Intel 

3# 

4# Authors: Tomasz Kaczynski <tomasz.kaczynski@intel.com> 

5# 

6# Licensed under the Apache License, Version 2.0 (the "License"); 

7# you may not use this file except in compliance with the License. 

8# You may obtain a copy of the License at 

9# 

10# http://www.apache.org/licenses/LICENSE-2.0 

11# 

12# Unless required by applicable law or agreed to in writing, software 

13# distributed under the License is distributed on an "AS IS" BASIS, 

14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 

15# implied. 

16# See the License for the specific language governing permissions and 

17# limitations under the License. 

18 

19from oslo_log import log 

20from oslo_serialization import jsonutils 

21from oslo_utils import units 

22 

23from watcher._i18n import _ 

24from watcher.decision_engine.scoring import base 

25 

26LOG = log.getLogger(__name__) 

27 

28 

29class DummyScorer(base.ScoringEngine): 

30 """Sample Scoring Engine implementing simplified workload classification. 

31 

32 Typically a scoring engine would be implemented using machine learning 

33 techniques. For example, for workload classification problem the solution 

34 could consist of the following steps: 

35 

36 1. Define a problem to solve: we want to detect the workload on the 

37 machine based on the collected metrics like power consumption, 

38 temperature, CPU load, memory usage, disk usage, network usage, etc. 

39 2. The workloads could be predefined, e.g. IDLE, CPU-INTENSIVE, 

40 MEMORY-INTENSIVE, IO-BOUND, ... 

41 Or we could let the ML algorithm to find the workloads based on the 

42 learning data provided. The decision here leads to learning algorithm 

43 used (supervised vs. non-supervised learning). 

44 3. Collect metrics from sample servers (learning data). 

45 4. Define the analytical model, pick ML framework and algorithm. 

46 5. Apply learning data to the data model. Once taught, the data model 

47 becomes a scoring engine and can start doing predictions or 

48 classifications. 

49 6. Wrap up the scoring engine with the class like this one, so it has a 

50 standard interface and can be used inside Watcher. 

51 

52 This class is a greatly very simplified version of the above model. The 

53 goal is to provide an example how such class could be implemented and used 

54 in Watcher, without adding additional dependencies like machine learning 

55 frameworks (which can be quite heavy) or over-complicating it's internal 

56 implementation, which can distract from looking at the overall picture. 

57 

58 That said, this class implements a workload classification "manually" 

59 (in plain python code) and is not intended to be used in production. 

60 """ 

61 

62 # Constants defining column indices for the input data 

63 PROCESSOR_TIME_PERC = 0 

64 MEM_TOTAL_BYTES = 1 

65 MEM_AVAIL_BYTES = 2 

66 MEM_PAGE_READS_PER_SEC = 3 

67 MEM_PAGE_WRITES_PER_SEC = 4 

68 DISK_READ_BYTES_PER_SEC = 5 

69 DISK_WRITE_BYTES_PER_SEC = 6 

70 NET_BYTES_RECEIVED_PER_SEC = 7 

71 NET_BYTES_SENT_PER_SEC = 8 

72 

73 # Types of workload 

74 WORKLOAD_IDLE = 0 

75 WORKLOAD_CPU = 1 

76 WORKLOAD_MEM = 2 

77 WORKLOAD_DISK = 3 

78 

79 def get_name(self): 

80 return 'dummy_scorer' 

81 

82 def get_description(self): 

83 return 'Dummy workload classifier' 

84 

85 def get_metainfo(self): 

86 """Metadata about input/output format of this scoring engine. 

87 

88 This information is used in strategy using this scoring engine to 

89 prepare the input information and to understand the results. 

90 """ 

91 

92 return """{ 

93 "feature_columns": [ 

94 "proc-processor-time-%", 

95 "mem-total-bytes", 

96 "mem-avail-bytes", 

97 "mem-page-reads/sec", 

98 "mem-page-writes/sec", 

99 "disk-read-bytes/sec", 

100 "disk-write-bytes/sec", 

101 "net-bytes-received/sec", 

102 "net-bytes-sent/sec"], 

103 "result_columns": [ 

104 "workload", 

105 "idle-probability", 

106 "cpu-probability", 

107 "memory-probability", 

108 "disk-probability"], 

109 "workloads": [ 

110 "idle", 

111 "cpu-intensive", 

112 "memory-intensive", 

113 "disk-intensive"] 

114 }""" 

115 

116 def calculate_score(self, features): 

117 """Arbitrary algorithm calculating the score. 

118 

119 It demonstrates how to parse the input data (features) and serialize 

120 the results. It detects the workload type based on the metrics and 

121 also returns the probabilities of each workload detection (again, 

122 the arbitrary values are returned, just for demonstration how the 

123 "real" machine learning algorithm could work. For example, the 

124 Gradient Boosting Machine from H2O framework is using exactly the 

125 same format: 

126 http://www.h2o.ai/verticals/algos/gbm/ 

127 """ 

128 

129 LOG.debug('Calculating score, features: %s', features) 

130 

131 # By default IDLE workload will be returned 

132 workload = self.WORKLOAD_IDLE 

133 idle_prob = 0.0 

134 cpu_prob = 0.0 

135 mem_prob = 0.0 

136 disk_prob = 0.0 

137 

138 # Basic input validation 

139 try: 

140 flist = jsonutils.loads(features) 

141 except Exception as e: 

142 raise ValueError(_('Unable to parse features: ') % e) 

143 if type(flist) is not list: 143 ↛ 144line 143 didn't jump to line 144 because the condition on line 143 was never true

144 raise ValueError(_('JSON list expected in feature argument')) 

145 if len(flist) != 9: 145 ↛ 146line 145 didn't jump to line 146 because the condition on line 145 was never true

146 raise ValueError(_('Invalid number of features, expected 9')) 

147 

148 # Simple logic for workload classification 

149 if flist[self.PROCESSOR_TIME_PERC] >= 80: 

150 workload = self.WORKLOAD_CPU 

151 cpu_prob = 100.0 

152 elif flist[self.MEM_PAGE_READS_PER_SEC] >= 1000 \ 

153 and flist[self.MEM_PAGE_WRITES_PER_SEC] >= 1000: 

154 workload = self.WORKLOAD_MEM 

155 mem_prob = 100.0 

156 elif flist[self.DISK_READ_BYTES_PER_SEC] >= 50*units.Mi \ 

157 and flist[self.DISK_WRITE_BYTES_PER_SEC] >= 50*units.Mi: 

158 workload = self.WORKLOAD_DISK 

159 disk_prob = 100.0 

160 else: 

161 idle_prob = 100.0 

162 if flist[self.PROCESSOR_TIME_PERC] >= 40: 

163 cpu_prob = 50.0 

164 if flist[self.MEM_PAGE_READS_PER_SEC] >= 500 \ 

165 or flist[self.MEM_PAGE_WRITES_PER_SEC] >= 500: 

166 mem_prob = 50.0 

167 

168 return jsonutils.dumps( 

169 [workload, idle_prob, cpu_prob, mem_prob, disk_prob])