mageec  0.1.0
MAchine Guided Energy Efficient Compilation
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
classify-sample.c
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Source code for use with See5/C5.0 Release 2.10 */
4 /* ----------------------------------------------- */
5 /* Copyright RuleQuest Research 2013 */
6 /* */
7 /* This code is provided "as is" without warranty of any kind, */
8 /* either express or implied. All use is at your own risk. */
9 /* */
10 /*************************************************************************/
11 
12 
13 /*************************************************************************/
14 /* */
15 /* Sample program to demonstrate the use of See5/C5.0 classifiers */
16 /* -------------------------------------------------------------- */
17 /* */
18 /* Compilation: */
19 /* */
20 /* Unix: use an ANSI C compiler such as gcc and include */
21 /* the math library, e.g. gcc sample.c -lm */
22 /* */
23 /* Windows: compile as a console application with symbol */
24 /* "WIN32" defined */
25 /* */
26 /* This program accepts three command-line options: */
27 /* */
28 /* -f <filestem> specify the application name (required) */
29 /* -r use rulesets instead of decision trees */
30 /* -R use rulesets and show rules used */
31 /* -x use a similar format as saved by the */
32 /* See5 cross-reference window */
33 /* */
34 /* The program expects to find the following files: */
35 /* */
36 /* <filestem>.names (the application names file) */
37 /* */
38 /* <filestem>.rules or <filestem>.tree (the classifier files */
39 /* generated by C5.0 or See5) */
40 /* */
41 /* <filestem>.costs (optional -- the costs file) */
42 /* */
43 /* <filestem>.cases (with a format similar to a .data file, but */
44 /* allowing classes to be given as '?' meaning 'unknown') */
45 /* */
46 /* Please note: the names file must be exactly as it was when */
47 /* the classifier was generated. */
48 /* */
49 /* For each case in <filestem.cases>, the program prints the */
50 /* given class and then the class predicted by the classifier */
51 /* together with the confidence of the prediction. */
52 /* */
53 /* Revised March 2013 */
54 /* */
55 /*************************************************************************/
56 
57 #include "C5/classify-defns.h"
58 #include "C5/classify-global.c"
59 #include "C5/classify-hooks.c"
60 
61 
62 
63 /*************************************************************************/
64 /* */
65 /* Main */
66 /* */
67 /*************************************************************************/
68 
69 
70 int c5_classify_main(char *filestem)
71 /* ---- */
72 {
73  FILE *F;
74  DataRec Case;
75  int CaseNo=0, MaxClassLen=5, o, TotalRules=0,
76  StartList, CurrentPosition;
77  ClassNo Predict, c;
78  Boolean XRefForm=false;
79  extern String OptArg, Option;
80  void ShowRules(int);
81 
82  /* Variable for holding whether we want to run a pass */
83  int runpass = 0;
84 
85  /* Process options */
86  FileStem = filestem;
87 
88  /* Read information on attribute names, values, and classes */
89 
90  if ( ! (F = GetFile(".names", "r")) ) Error(NOFILE, Fn, "");
91 
92  GetNames(F);
93 
94  /* Set up the classification environment */
95 
96  GCEnv = AllocZero(1, CEnvRec);
97 
98  GCEnv->ClassWt = Alloc(MaxClass+1, double);
99  GCEnv->Vote = Alloc(MaxClass+1, float);
100 
101  /* Read the appropriate classifier file. Call CheckFile() to
102  determine the number of trials, then allocate space for
103  trees or rulesets */
104 
105  if ( RULES )
106  {
107  CheckFile(".rules", false);
109 
110  ForEach(Trial, 0, TRIALS-1)
111  {
112  RuleSet[Trial] = GetRules(".rules");
113  TotalRules += RuleSet[Trial]->SNRules;
114  }
115 
116  if ( RULESUSED )
117  {
118  GCEnv->RulesUsed = Alloc(TotalRules + TRIALS, RuleNo);
119  }
120 
122  }
123  else
124  {
125  CheckFile(".tree", false);
126  Pruned = AllocZero(TRIALS+1, Tree);
127 
128  ForEach(Trial, 0, TRIALS-1)
129  {
130  Pruned[Trial] = GetTree(".tree");
131  }
132  }
133 
134  /* Close the classifier file and reset the file variable */
135 
136  fclose(TRf);
137  TRf = 0;
138 
139  /* Set global default class for boosting */
140 
141  Default = ( RULES ? RuleSet[0]->SDefault : Pruned[0]->Leaf );
142 
143  /* Now classify the cases in file <filestem>.cases.
144  This has the same format as a .data file except that
145  the class can be "?" to indicate that it is unknown. */
146 
147  if ( XRefForm )
148  {
149  ForEach(c, 1, MaxClass)
150  {
151  if ( (o = strlen(ClassName[c])) > MaxClassLen ) MaxClassLen = o;
152  }
153 
154  /*printf("%-15s %*s [Predicted]%s\n\n",
155  "Case", -MaxClassLen, "Class",
156  ( RULESUSED ? " Rules" : "" ));*/
157 
158  StartList = 16 + MaxClassLen + 3 +
159  ( MaxClassLen > 9 ? MaxClassLen + 2 : 11 ) + 3;
160  }
161  else
162  {
163  /*printf("Case\t\tGiven\t\tPredicted%s\n %s\t\tClass\t\tClass\n\n",
164  ( RULESUSED ? "\t\t Rules" : "" ),
165  ( LabelAtt ? "ID" : "No" ));*/
166 
167  StartList = 60;
168  }
169 
170  if ( ! (F = GetFile(".cases", "r")) ) Error(NOFILE, Fn, "");
171 
172  LineNo = 0;
173 
174  while ( (Case = GetDataRec(F, false)) )
175  {
176  /* For this case, find the class predicted by See5/C5.0 model */
177 
178  Predict = Classify(Case, GCEnv);
179 
180  /* Print either case label or number */
181 
182  if ( LabelAtt )
183  {
184  /*printf("%-15.15s ", (String) (IgnoredVals + SVal(Case,LabelAtt)));*/
185  }
186  else
187  {
188  /*printf("%4d\t\t", ++CaseNo);*/
189  }
190 
191  /* Print the result for this case in alternative formats */
192 
193  if ( XRefForm )
194  {
195  /*printf("%*s", -MaxClassLen, ClassName[Class(Case)]);
196  CurrentPosition = 16 + MaxClassLen;
197 
198  if ( Class(Case) != Predict )
199  {
200  printf(" [%s]", ClassName[Predict]);
201  CurrentPosition += 5 + strlen(ClassName[Predict]);
202  }*/
203  }
204  else
205  {
206  /*&printf("%-15.15s %-15.15s [%.2f]",
207  ClassName[Class(Case)],
208  ClassName[Predict], GCEnv->Confidence);*/
209  CurrentPosition = 54;
210  }
211 
212  /* Return the value that MAGEEC Basic Pass Selector wants.
213  This is whether we classify as "t". */
214  if (!strcmp(ClassName[Predict], "t"))
215  runpass = 1;
216 
217  if ( RULESUSED ) ShowRules(StartList - CurrentPosition);
218 
219  /*printf("\n");*/
220 
221  /* Free the memory used by this case */
222 
223  FreeLastCase(Case);
224  }
225 
226  /* Close the case file and free allocated memory */
227 
228  fclose(F);
229  FreeGlobals();
230 
231  return runpass;
232 }
233 
234 
235 
236 /*************************************************************************/
237 /* */
238 /* Show rules that were used to classify a case. */
239 /* Classify() will have set GCEnvRulesUsed[] to */
240 /* number of active rules for trial 0, */
241 /* first active rule, second active rule, ..., last active rule, */
242 /* number of active rules for trial 1, */
243 /* first active rule, second active rule, ..., last active rule, */
244 /* and so on. */
245 /* */
246 /*************************************************************************/
247 
248 
249 void ShowRules(int Spaces)
250 /* --------- */
251 {
252  int p, pLast, a, b, First;
253 
254  printf("%*s", Spaces, "");
255 
256  p = 0;
257  ForEach(Trial, 0, TRIALS-1)
258  {
259  pLast = p + GCEnv->RulesUsed[p];
260 
261  ForEach(a, 1, GCEnv->RulesUsed[p])
262  {
263  /* Rules used are not in order, so find first */
264 
265  First = 0;
266 
267  ForEach(b, p+1, pLast)
268  {
269  if ( GCEnv->RulesUsed[b] &&
270  ( ! First ||
271  GCEnv->RulesUsed[b] < GCEnv->RulesUsed[First] ) )
272  {
273  First = b;
274  }
275  }
276 
277  if ( TRIALS > 1 ) printf("%d/", Trial);
278 
279  printf("%d ", GCEnv->RulesUsed[First]);
280 
281  GCEnv->RulesUsed[First] = 0;
282  }
283 
284  p = pLast + 1;
285  }
286 }