mageec  0.1.0
MAchine Guided Energy Efficient Compilation
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
ml.cpp
Go to the documentation of this file.
1 /* MAGEEC Machine Learner
2  Copyright (C) 2013, 2014 Embecosm Limited and University of Bristol
3 
4  This file is part of MAGEEC.
5 
6  This program is free software: you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation, either version 3 of the License, or
9  (at your option) any later version.
10 
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 
20 #include "mageec/mageec-ml.h"
21 #include <iostream>
22 #include <fstream>
23 #include <cassert>
24 #include <cstdio>
25 #include <cstdlib>
26 
27 using namespace mageec;
28 
29 // Modified C5 classifier which is currently compiled as C
30 extern "C" {
31  int c5_classify_main(char *filestem);
32 }
33 
34 int mageec_ml::init (std::string dbfilename)
35 {
36  std::cerr << "LEARNER: Hello!" << std::endl;
37  /* FIXME: The second parameter should be false, we do not want to be creating
38  a new database here */
39  db = new database(dbfilename, true);
40  known_passes = db->get_pass_list();
41  return 0;
42 }
43 
44 int mageec_ml::init (std::string compiler_version,
45  std::string compiler_target)
46 {
47  return init (compiler_version + "-" + compiler_target + ".db");
48 }
49 
51 {
52  std::cerr << "LEARNER: New file" << std::endl;
53 }
54 
56 {
57  std::cerr << "LEARNER: End file" << std::endl;
58 }
59 
60 void mageec_ml::finish (void)
61 {
62  std::cerr << "LEARNER: Goodbye!" << std::endl;
63  if (db)
64  delete db;
65 }
66 
67 std::vector<mageec_pass*> mageec_ml::all_passes (void)
68 {
69  return known_passes;
70 }
71 
72 void mageec_ml::add_result (std::vector<mageec_feature*> features,
73  std::vector<mageec_pass*> passes,
74  int64_t metric,
75  bool good __attribute__((unused)))
76 {
77  if (!db)
78  return;
79 
80  result res;
81  res.passlist = passes,
82  res.featlist = features,
83  res.progname = "",
84  res.metric = metric;
85 
86  db->add_result(res);
87 }
88 
90 {
91  if (!db)
92  return;
93  db->add_result(res);
94 }
95 
97  std::vector<mageec_feature*> features)
98 {
99  assert(pass != NULL && "Decision of null pass requested.");
100  // We cannot make a decision if we don't have a database backend
101  if (!db)
102  return NATIVE_DECISION;
103 
104  unsigned long featurecount = features.size();
105  assert(featurecount > 0 && "Empty feature vector.");
106  const char *passname = pass->name().c_str();
107 
108  // namebuf will be used as a buffer to generate file names
109  char namebuf[1024];
110 
111  // "tree" file - Learnt machine learning tree (and return if no tree)
112  const char *treedata = db->get_pass_blob (pass->name());
113  if (treedata == NULL)
114  return NATIVE_DECISION;
115 
116  snprintf (namebuf, 1024, "/tmp/%s.tree", passname);
117  std::ofstream treefile(namebuf);
118  treefile << treedata;
119  treefile.close();
120 
121  // "names" file - Columns for classifier
122  snprintf (namebuf, 1024, "/tmp/%s.names", passname);
123  std::ofstream namefile(namebuf);
124  namefile << "runpass." << std::endl;
125  for (unsigned long i = 0; i < featurecount; i++)
126  namefile << features[i]->name() << ": continuous." << std::endl;
127  namefile << "runpass: t,f" << std::endl;;
128  namefile.close();
129 
130  // "cases" file - Data to make decision on (i.e. our function)
131  snprintf (namebuf, 1024, "/tmp/%s.cases", passname);
132  std::ofstream casesfile(namebuf);
133  for (unsigned int i = 0; i < featurecount; i++)
134  casesfile << features[i]->get_feature() << ',';
135  casesfile << '?' << std::endl;
136  casesfile.close();
137 
138  // Classify program as run/don't run
139  snprintf (namebuf, 1024, "/tmp/%s", passname);
140  int execute = c5_classify_main(namebuf);
141 
142  // Delete temporary files
143  /*snprintf (namebuf, 1024, "/tmp/%s.cases", passname);
144  remove (namebuf);
145  snprintf (namebuf, 1024, "/tmp/%s.names", passname);
146  remove (namebuf);
147  snprintf (namebuf, 1024, "/tmp/%s.tree", passname);
148  remove (namebuf);
149  */
150 
151  return (execute == 1) ? FORCE_EXECUTE : FORCE_NOEXECUTE;
152 }
153 
154 // FIXME: Add random file name prefix
156 {
157  // We cannot learn if we don't have a database backend
158  if (!db)
159  return;
160 
161  std::vector<result> results = db->get_all_results();
162 
163  // If we have no results, then there is nothing to learn.
164  if (results.size() == 0)
165  return;
166 
167  // Calculate number of features from length of first result's feature vector
168  unsigned long featurecount = results[0].featlist.size();
169 
170  /* Brief note to myself:
171  Write out the column file for the pass, using "torun" as the variable to
172  find
173  */
174  for (unsigned long i = 0, size = known_passes.size(); i < size; i++)
175  {
176  // namebuf will be used as a buffer to generate file names
177  char namebuf[1024];
178  std::string passnamestr = known_passes[i]->name();
179  const char *passname = passnamestr.c_str();
180  std::cerr << "Training for " << passname << std::endl;
181 
182  // Output names file (columns for classifier)
183  snprintf (namebuf, 1024, "/tmp/%s.names", passname);
184  std::ofstream namefile(namebuf);
185  namefile << "runpass." << std::endl;
186  for (unsigned long j = 0; j < featurecount; j++)
187  namefile << results[0].featlist[j]->name()
188  << ": continuous." << std::endl;
189  namefile << "runpass: t,f" << std::endl;;
190  namefile.close();
191 
192  // Output test data (.data file)
193  snprintf (namebuf, 1024, "/tmp/%s.data", passname);
194  std::ofstream testfile(namebuf);
195  for (unsigned long j = 0, jsize = results.size(); j < jsize; j++)
196  {
197  for (unsigned long k = 0; k < featurecount; k++)
198  testfile << results[j].featlist[k]->get_feature() << ',';
199  bool ran = false;
200  for (unsigned long k = 0, ksize = results[j].passlist.size(); k < ksize; k++)
201  if (results[j].passlist[k]->name() == passname)
202  {
203  ran = true;
204  break;
205  }
206  if (ran)
207  testfile << 't' << std::endl;
208  else
209  testfile << 'f' << std::endl;
210  }
211  testfile.close();
212 
213  // Call the machine learner
214  FILE *fpipe;
215  snprintf (namebuf, 1024, "%s/c5.0 -f /tmp/%s", LIBEXECDIR, passname);
216  if (!(fpipe = static_cast<FILE *>(popen(namebuf, "r"))))
217  std::cerr << "Error Training for " << passname << std::endl;
218  else
219  while (fgets(namebuf, 1024, fpipe))
220  ;
221  pclose(fpipe);
222 
223  // Store Machine Learnt Tree
224  snprintf (namebuf, 1024, "/tmp/%s.tree", passname);
225  std::ifstream treefile(namebuf, std::ifstream::binary);
226  if (treefile)
227  {
228  treefile.seekg (0, treefile.end);
229  long treelength = treefile.tellg();
230  treefile.seekg (0, treefile.beg);
231  char *treebuf = new char[treelength];
232  treefile.read (treebuf, treelength);
233 
234  db->store_pass_blob (passnamestr, treebuf);
235 
236  delete[] treebuf;
237  }
238 
239  // Delete temporary files (namebuf holds tree file for storing)
240  /*remove (namebuf);
241  snprintf (namebuf, 1024, "/tmp/%s.names", passname);
242  remove (namebuf);
243  snprintf (namebuf, 1024, "/tmp/%s.data", passname);
244  remove (namebuf);
245  snprintf (namebuf, 1024, "/tmp/%s.tmp", passname);
246  remove (namebuf);*/
247 
248  }
249 }
250 
251 // Initilizer for file based machine learner
252 int file_ml::init (std::string dbfilename __attribute__((unused)))
253 {
254  // Attempt to load the file pointed to by MAGEEC_EXECUTELIST or a default.
255  const char *pass_file = getenv("MAGEEC_EXECUTELIST");
256  if (pass_file == NULL)
257  pass_file = static_cast<const char *>("/tmp/mageec-executelist");
258  std::ifstream input_file(pass_file);
259  if (!input_file.is_open())
260  return 1;
261 
262  // Each line in this file holds the name of a pass we want to execute
263  // Empty lines are ignored as they do not contain a pass
264  for (std::string line; getline(input_file, line); )
265  if (line != "")
266  passlist.push_back(line);
267 
268  input_file.close();
269  return 0;
270 }
271 
272 // Initilizer for file based machine learner
273 int file_ml::init (std::string compiler_version __attribute__((unused)),
274  std::string compiler_target __attribute__((unused)))
275 {
276  return init("");
277 }
278 
279 // File based decision maker
281  std::vector<mageec_feature*> features
282  __attribute__((unused)))
283 {
284  unsigned long passcount = passlist.size();
285  std::string passname = pass->name();
286  for (unsigned long i = 0; i < passcount; i++)
287  if (passlist[i] == passname)
288  return FORCE_EXECUTE;
289  return FORCE_NOEXECUTE;
290 }