mageec  0.1.0
MAchine Guided Energy Efficient Compilation
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
feature-extract.cpp
Go to the documentation of this file.
1 /* GIMPLE Feature Extractor
2  Copyright (C) 2013, 2014 Embecosm Limited and University of Bristol
3 
4  This file is part of MAGEEC.
5 
6  This program is free software: you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation, either version 3 of the License, or
9  (at your option) any later version.
10 
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 
20 /* We need to undefine these as gcc-plugin.h redefines them */
21 #undef PACKAGE_BUGREPORT
22 #undef PACKAGE_NAME
23 #undef PACKAGE_STRING
24 #undef PACKAGE_TARNAME
25 #undef PACKAGE_VERSION
26 
27 #ifndef BUILDING_GCC_VERSION
28  #include "bversion.h"
29  #define GCC_VERSION BUILDING_GCC_VERSION
30 #endif
31 #if (BUILDING_GCC_VERSION < 4009)
32  #include "gcc-plugin.h"
33  #include "tree-pass.h"
34  #include "gimple.h"
35  #include "function.h"
36  #include "toplev.h"
37 #else
38  #include "gcc-plugin.h"
39  #include "config.h"
40  #include "system.h"
41  #include "coretypes.h"
42  #include "tm.h"
43  #include "tree.h"
44  #include "stringpool.h"
45  #include "toplev.h"
46  #include "basic-block.h"
47  #include "pointer-set.h"
48  #include "hash-table.h"
49  #include "vec.h"
50  #include "ggc.h"
51  #include "basic-block.h"
52  #include "tree-ssa-alias.h"
53  #include "internal-fn.h"
54  #include "gimple-fold.h"
55  #include "tree-eh.h"
56  #include "gimple-expr.h"
57  #include "is-a.h"
58  #include "gimple.h"
59  #include "gimple-iterator.h"
60  #include "tree.h"
61  #include "tree-pass.h"
62  #include "intl.h"
63  #include "diagnostic.h"
64  #include "context.h"
65 #endif
66 
67 /* MAGEEC Headers */
68 #include "mageec-plugin.h"
69 #include "mageec/vectormath.h"
70 #include <iostream>
71 #include <vector>
72 
73 using namespace mageec;
74 
79 static bool mageec_featextract_gate(void)
80 {
81  return true;
82 }
83 
87 static unsigned mageec_featextract_exec(void)
88 {
89  basic_block bb;
90  gimple_stmt_iterator gsi;
91  edge e;
92  edge_iterator ei;
93 
94  // Variables for holding feature information
95  std::vector<unsigned> insn_counts;
96  unsigned bb_count = 0; // ft1
97  unsigned bb_single_successor = 0; // ft2
98  unsigned bb_two_successors = 0; // ft3
99  unsigned bb_gt2_successors = 0; // ft4
100  unsigned bb_single_predecessor = 0; // ft5
101  unsigned bb_two_predecessors = 0; // ft6
102  unsigned bb_gt2_predecessors = 0; // ft7
103  unsigned bb_1pred_1succ = 0; // ft8
104  unsigned bb_1pred_2succ = 0; // ft9
105  unsigned bb_2pred_1succ = 0; //ft10
106  unsigned bb_2pred_2succ = 0; //ft11
107  unsigned bb_gt2pred_gt2succ = 0; //ft12
108  unsigned insn_count_lt15 = 0; //ft13
109  unsigned insn_count_15_to_500 = 0; //ft14
110  unsigned insn_count_gt500 = 0; //ft15
111  unsigned edges_in_cfg = 0; //ft16
112  unsigned edges_critical = 0; //ft17
113  unsigned edges_abnormal = 0; //ft18
114  unsigned method_cond_stmt = 0; //ft19
115  unsigned call_direct = 0; //ft20
116  unsigned method_assignments = 0; //ft21
117  unsigned int_operations = 0; //ft22
118  unsigned float_operations = 0; //ft23
119  unsigned average_phi_node_head = 0; //ft26
120  unsigned average_phi_args = 0; //ft27
121  unsigned bb_phi_count_0 = 0; //ft28
122  unsigned bb_phi_count_0_to_3 = 0; //ft29
123  unsigned bb_phi_count_gt3 = 0; //ft30
124  unsigned bb_phi_args_gt5 = 0; //ft31
125  unsigned bb_phi_args_1_to_5 = 0; //ft32
126  unsigned method_switch_stmt = 0; //ft33
127  unsigned method_unary_ops = 0; //ft34
128  unsigned pointer_arith = 0; //ft35
129  unsigned call_indirect = 0; //ft39
130  unsigned call_ptr_arg = 0; //ft42
131  unsigned call_gt4_args = 0; //ft43
132  unsigned call_ret_float = 0; //ft44
133  unsigned call_ret_int = 0; //ft45
134  unsigned uncond_branches = 0; //ft46
135 
136  // Temporaries
137  unsigned stmt_count = 0;
138  unsigned phi_nodes = 0;
139  unsigned phi_args = 0;
140  bool in_phi_header = false; //switch for ft26
141  unsigned phi_header_nodes = 0; //total for ft26
142  unsigned total_phi_args = 0; //total for ft27
143  unsigned total_phi_nodes = 0; //divisor for ft27
144 
145 #if (GCC_VERSION < 4009)
146  FOR_EACH_BB (bb)
147 #else
148  FOR_ALL_BB_FN (bb, cfun)
149 #endif
150  {
151  stmt_count = 0;
152  phi_nodes = 0;
153  phi_args = 0;
154  in_phi_header = true;
155 
156  // For this block count instructions and types
157  bb_count++;
158  for (gsi=gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi))
159  {
160  gimple stmt = gsi_stmt (gsi);
161  stmt_count++;
162  // Assignment analysis
163  if (is_gimple_assign (stmt))
164  {
165  method_assignments++;
166  enum gimple_rhs_class grhs_class =
167  get_gimple_rhs_class (gimple_expr_code (stmt));
168  if (grhs_class == GIMPLE_UNARY_RHS)
169  method_unary_ops++;
170  if (grhs_class == GIMPLE_BINARY_RHS)
171  {
172  tree arg1 = gimple_assign_rhs1 (stmt);
173  tree arg2 = gimple_assign_rhs2 (stmt);
174  if (FLOAT_TYPE_P (TREE_TYPE (arg1)))
175  float_operations++;
176  else if (INTEGRAL_TYPE_P (TREE_TYPE (arg2)))
177  int_operations++;
178  //FIXME: Is this correct for detecting pointer arith?
179  if (POINTER_TYPE_P (TREE_TYPE (arg1)) ||
180  POINTER_TYPE_P (TREE_TYPE (arg2)))
181  pointer_arith++;
182  }
183  }
184 
185  // Phi Analysis
186  if (gimple_code (stmt) == GIMPLE_PHI)
187  {
188  phi_nodes++;
189  total_phi_nodes++;
190  if (in_phi_header)
191  phi_header_nodes++;
192  phi_args += gimple_phi_num_args (stmt);
193  total_phi_args += gimple_phi_num_args (stmt);
194  }
195  else
196  in_phi_header = false;
197  if (gimple_code (stmt) == GIMPLE_SWITCH)
198  method_switch_stmt++;
199 
200  // Call analysis
201  if (is_gimple_call (stmt))
202  {
203  if (gimple_call_num_args (stmt) > 4)
204  call_gt4_args++;
205  // Check if call has a pointer argument
206  bool call_has_ptr = false;
207  for (unsigned i = 0; i < gimple_call_num_args (stmt); i++)
208  {
209  tree arg = gimple_call_arg (stmt, i);
210  if (POINTER_TYPE_P (TREE_TYPE (arg)))
211  call_has_ptr = true;
212  }
213  if (call_has_ptr)
214  call_ptr_arg++;
215  // Get current statement, if not null, then direct
216  tree call_fn = gimple_call_fndecl (stmt);
217  if (call_fn)
218  call_direct++;
219  else
220  call_indirect++;
221  tree call_ret = gimple_call_lhs (stmt);
222  if(call_ret)
223  {
224  if (FLOAT_TYPE_P (TREE_TYPE (call_ret)))
225  call_ret_float++;
226  else if (INTEGRAL_TYPE_P (TREE_TYPE (call_ret)))
227  call_ret_int++;
228  }
229  }
230 
231  if (gimple_code (stmt) == GIMPLE_COND)
232  method_cond_stmt++;
233  } // end gimple iterator
234 
235  // Successor/predecessor information
236  if (EDGE_COUNT(bb->succs) == 1)
237  bb_single_successor++;
238  else if (EDGE_COUNT(bb->succs) == 2)
239  bb_two_successors++;
240  else if (EDGE_COUNT(bb->succs) > 2)
241  bb_gt2_successors++;
242  if (EDGE_COUNT(bb->preds) == 1)
243  bb_single_predecessor++;
244  else if (EDGE_COUNT(bb->preds) == 2)
245  bb_two_predecessors++;
246  else if (EDGE_COUNT(bb->preds) > 2)
247  bb_gt2_predecessors++;
248  if ((EDGE_COUNT(bb->preds) == 1) && (EDGE_COUNT(bb->succs) == 1))
249  bb_1pred_1succ++;
250  if ((EDGE_COUNT(bb->preds) == 1) && (EDGE_COUNT(bb->succs) == 2))
251  bb_1pred_2succ++;
252  if ((EDGE_COUNT(bb->preds) == 2) && (EDGE_COUNT(bb->succs) == 1))
253  bb_2pred_1succ++;
254  if ((EDGE_COUNT(bb->preds) == 2) && (EDGE_COUNT(bb->succs) == 2))
255  bb_2pred_2succ++;
256  if ((EDGE_COUNT(bb->preds) > 2) && (EDGE_COUNT(bb->succs) > 2))
257  bb_gt2pred_gt2succ++;
258 
259  // CFG information
260  FOR_EACH_EDGE (e, ei, bb->succs)
261  {
262  edges_in_cfg++;
263  if (EDGE_CRITICAL_P (e))
264  edges_critical++;
265  if (e->flags & EDGE_ABNORMAL)
266  edges_abnormal++;
267  }
268 
269 
270  // Store processed data about this block
271  insn_counts.push_back(stmt_count);
272  if (stmt_count < 15)
273  insn_count_lt15++;
274  else if (stmt_count > 500)
275  insn_count_gt500++;
276  else
277  insn_count_15_to_500++;
278 
279  if (phi_nodes == 0)
280  bb_phi_count_0++;
281  if (phi_nodes <= 3)
282  bb_phi_count_0_to_3++;
283  else
284  bb_phi_count_gt3++;
285  if (phi_args > 5)
286  bb_phi_args_gt5++;
287  else if ((phi_args >= 1) && (phi_args <= 5))
288  bb_phi_args_1_to_5++;
289  } /* end of basic block */
290 
291  // Calculate averages once totals have been collected
292  if (total_phi_nodes > 0)
293  average_phi_args = total_phi_args / total_phi_nodes;
294  if (bb_count > 0)
295  average_phi_node_head = phi_header_nodes / bb_count;
296 
297  /* Build feature vector to pass to machine learner */
298  std::vector<mageec::mageec_feature*> features;
299  features.push_back(new basic_feature("ft1", "Basic Block Count", bb_count));
300  features.push_back(new basic_feature("ft2", "BB with 1 successor", bb_single_successor));
301  features.push_back(new basic_feature("ft3", "BB with 2 successor", bb_two_successors));
302  features.push_back(new basic_feature("ft4", "BB with > 2 successor", bb_gt2_successors));
303  features.push_back(new basic_feature("ft5", "BB with 1 predecessor", bb_single_predecessor));
304  features.push_back(new basic_feature("ft6", "BB with 2 predecessor", bb_two_predecessors));
305  features.push_back(new basic_feature("ft7", "BB with > 2 predecesso", bb_gt2_predecessors));
306  features.push_back(new basic_feature("ft8", "BB with 1 pred 1 succ", bb_1pred_1succ));
307  features.push_back(new basic_feature("ft9", "BB with 1 pred 2 succ", bb_1pred_2succ));
308  features.push_back(new basic_feature("ft10", "BB with 2 pred 1 succ", bb_2pred_1succ));
309  features.push_back(new basic_feature("ft11", "BB with 2 pred 2 succ", bb_2pred_2succ));
310  features.push_back(new basic_feature("ft12", "BB with >2 pred >2 suc", bb_gt2pred_gt2succ));
311  features.push_back(new basic_feature("ft13", "BB with insn < 15", insn_count_lt15));
312  features.push_back(new basic_feature("ft14", "BB with insn [15, 500]", insn_count_15_to_500));
313  features.push_back(new basic_feature("ft15", "BB with insn > 500", insn_count_gt500));
314  features.push_back(new basic_feature("ft16", "Edges in CFG", edges_in_cfg));
315  features.push_back(new basic_feature("ft17", "Critical Edges in CFG", edges_critical));
316  features.push_back(new basic_feature("ft18", "Abnormal Edges in CFG", edges_abnormal));
317  features.push_back(new basic_feature("ft19", "Conditional Statements", method_cond_stmt));
318  features.push_back(new basic_feature("ft20", "Direct Calls", call_direct));
319  features.push_back(new basic_feature("ft21", "Assignments in method", method_assignments));
320  features.push_back(new basic_feature("ft22", "Integer operations", int_operations));
321  features.push_back(new basic_feature("ft23", "Floating Point Operations", float_operations));
322  features.push_back(new basic_feature("ft24", "Total Statement in BB", vector_sum(insn_counts)));
323  features.push_back(new basic_feature("ft25", "Avg Statement in BB", vector_sum(insn_counts)/bb_count));
324  features.push_back(new basic_feature("ft26", "Avg phis at top of BB", average_phi_node_head));
325  features.push_back(new basic_feature("ft27", "Average phi arg count", average_phi_args));
326  features.push_back(new basic_feature("ft28", "BB with 0 phis", bb_phi_count_0));
327  features.push_back(new basic_feature("ft29", "BB with [0, 3] phis", bb_phi_count_0_to_3));
328  features.push_back(new basic_feature("ft30", "BB with > 3 phis", bb_phi_count_gt3));
329  features.push_back(new basic_feature("ft31", "BB phis with > 5 args", bb_phi_args_gt5));
330  features.push_back(new basic_feature("ft32", "BB phis with [1,5] arg", bb_phi_args_1_to_5));
331  features.push_back(new basic_feature("ft33", "Switch stmts in method", method_switch_stmt));
332  features.push_back(new basic_feature("ft34", "Unary ops in method", method_unary_ops));
333  features.push_back(new basic_feature("ft35", "Pointer Arithmetic", pointer_arith));
334  features.push_back(new basic_feature("ft39", "Indirect Calls", call_indirect));
335  features.push_back(new basic_feature("ft42", "Calls with pointer args", call_ptr_arg));
336  features.push_back(new basic_feature("ft43", "Calls with > 4 args", call_gt4_args));
337  features.push_back(new basic_feature("ft44", "Calls that return float", call_ret_float));
338  features.push_back(new basic_feature("ft45", "Calls that return int", call_ret_int));
339  features.push_back(new basic_feature("ft46", "Unconditional branches", uncond_branches));
340 
341  mageec_inst.take_features(current_function_name(), features);
342 
343  // Print feature vector, first as a list and then as JSON
344  std::cerr << "Current Function: " << current_function_name() << std::endl;
345  mageec_feature::dump_vector(features, std::cerr, false);
346  mageec_feature::dump_vector(features, std::cerr, true);
347 
348  return 0;
349 }
350 
354 #if (GCC_VERSION < 4009)
355 static struct gimple_opt_pass mageec_featextract =
356 {
357  {
358  GIMPLE_PASS,
359  "mageec-extractor", /* name */
360  OPTGROUP_NONE, /* optinfo_flags */
361  mageec_featextract_gate, /* gate */
362  mageec_featextract_exec, /* execute */
363  NULL, /* sub */
364  NULL, /* next */
365  0, /* static_pass_number */
366  TV_NONE, /* tv_id */
367  PROP_ssa, /* properties_required */
368  0, /* properties_provided */
369  0, /* properties_destroyed */
370  0, /* todo_flags_start */
371  0 /* todo_flags_finish */
372  }
373 };
374 #else
375 namespace {
376 
377 const pass_data pass_data_mageec_featextract =
378 {
379  GIMPLE_PASS, /* type */
380  "mageec-extractor", /* name */
381  OPTGROUP_NONE, /* optinfo_flags */
382  true, /* has_gate */
383  true, /* has_execute */
384  TV_NONE, /* tv_id */
385  PROP_ssa, /* properties_required */
386  0, /* properties_provided */
387  0, /* properties_destroyed */
388  0, /* todo_flags_start */
389  0, /* todo_flags_finish */
390 };
391 
392 class mageec_featpass : public gimple_opt_pass
393 {
394 public:
395  mageec_featpass(gcc::context *ctxt)
396  : gimple_opt_pass(pass_data_mageec_featextract, ctxt)
397  {}
398 
399  /* opt_pass methods: */
400  bool gate () { return mageec_featextract_gate (); }
401  unsigned int execute () { return mageec_featextract_exec (); }
402 
403 }; // class mageec_featpass
404 
405 }
406 
407 static gimple_opt_pass *
408 make_mageec_pass (gcc::context *ctxt)
409 {
410  return new mageec_featpass (ctxt);
411 }
412 #endif
413 
419 {
420  struct register_pass_info pass;
421 
422 #if (GCC_VERSION < 4009)
423  pass.pass = &mageec_featextract.pass;
424 #else
425  pass.pass = make_mageec_pass (g);
426 #endif
427  pass.reference_pass_name = "ssa";
428  pass.ref_pass_instance_number = 1;
429  pass.pos_op = PASS_POS_INSERT_AFTER;
430 
431  register_callback (mageec_gcc_plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL,
432  &pass);
433 }