mageec  0.1.0
MAchine Guided Energy Efficient Compilation
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
c50.c
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Copyright 2010 Rulequest Research Pty Ltd. */
4 /* */
5 /* This file is part of C5.0 GPL Edition, a single-threaded version */
6 /* of C5.0 release 2.07. */
7 /* */
8 /* C5.0 GPL Edition is free software: you can redistribute it and/or */
9 /* modify it under the terms of the GNU General Public License as */
10 /* published by the Free Software Foundation, either version 3 of the */
11 /* License, or (at your option) any later version. */
12 /* */
13 /* C5.0 GPL Edition is distributed in the hope that it will be useful, */
14 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
15 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */
16 /* General Public License for more details. */
17 /* */
18 /* You should have received a copy of the GNU General Public License */
19 /* (gpl.txt) along with C5.0 GPL Edition. If not, see */
20 /* */
21 /* <http://www.gnu.org/licenses/>. */
22 /* */
23 /*************************************************************************/
24 
25 
26 
27 /*************************************************************************/
28 /* */
29 /* Main routine, C5.0 */
30 /* ------------------ */
31 /* */
32 /*************************************************************************/
33 
34 
35 #include "defns.i"
36 #include "extern.i"
37 #include <signal.h>
38 
39 #include <sys/unistd.h>
40 #include <sys/time.h>
41 #include <sys/resource.h>
42 
43 #define SetFOpt(V) V = strtod(OptArg, &EndPtr);\
44  if ( ! EndPtr || *EndPtr != '\00' ) break;\
45  ArgOK = true
46 #define SetIOpt(V) V = strtol(OptArg, &EndPtr, 10);\
47  if ( ! EndPtr || *EndPtr != '\00' ) break;\
48  ArgOK = true
49 
50 
51 int main(int Argc, char *Argv[])
52 /* ---- */
53 {
54  int o;
55  extern String OptArg, Option;
56  char *EndPtr;
57  Boolean FirstTime=true, ArgOK;
58  double StartTime;
59  FILE *F;
60  CaseNo SaveMaxCase;
61  Attribute Att;
62 
63  struct rlimit RL;
64 
65  /* Make sure there is a largish runtime stack */
66 
67  getrlimit(RLIMIT_STACK, &RL);
68 
69  RL.rlim_cur = Max(RL.rlim_cur, 20 * 1024 * 1024);
70 
71  if ( RL.rlim_max > 0 ) /* -1 if unlimited */
72  {
73  RL.rlim_cur = Min(RL.rlim_max, RL.rlim_cur);
74  }
75 
76  setrlimit(RLIMIT_STACK, &RL);
77 
78 
79  /* Check for output to be saved to a file */
80 
81  if ( Argc > 2 && ! strcmp(Argv[Argc-2], "-o") )
82  {
83  Of = fopen(Argv[Argc-1], "w");
84  Argc -= 2;
85  }
86 
87  if ( ! Of )
88  {
89  Of = stdout;
90  }
91 
92  KRInit = time(0) & 07777;
93 
94  PrintHeader("");
95 
96  /* Process options */
97 
98  while ( (o = ProcessOption(Argc, Argv, "f+bpv+t+sm+c+S+I+ru+egX+wh")) )
99  {
100  if ( FirstTime )
101  {
102  fprintf(Of, T_OptHeader);
103  FirstTime = false;
104  }
105 
106  ArgOK = false;
107 
108  switch (o)
109  {
110  case 'f': FileStem = OptArg;
111  fprintf(Of, T_OptApplication, FileStem);
112  ArgOK = true;
113  break;
114  case 'b': BOOST = true;
115  fprintf(Of, T_OptBoost);
116  if ( TRIALS == 1 ) TRIALS = 10;
117  ArgOK = true;
118  break;
119  case 'p': PROBTHRESH = true;
120  fprintf(Of, T_OptProbThresh);
121  ArgOK = true;
122  break;
123 #ifdef VerbOpt
124  case 'v': SetIOpt(VERBOSITY);
125  fprintf(Of, "\tVerbosity level %d\n", VERBOSITY);
126  ArgOK = true;
127  break;
128 #endif
129  case 't': SetIOpt(TRIALS);
130  fprintf(Of, T_OptTrials, TRIALS);
131  Check(TRIALS, 3, 1000);
132  BOOST = true;
133  break;
134  case 's': SUBSET = true;
135  fprintf(Of, T_OptSubsets);
136  ArgOK = true;
137  break;
138  case 'm': SetFOpt(MINITEMS);
139  fprintf(Of, T_OptMinCases, MINITEMS);
140  Check(MINITEMS, 1, 1000000);
141  break;
142  case 'c': SetFOpt(CF);
143  fprintf(Of, T_OptCF, CF);
144  Check(CF, 0, 100);
145  CF /= 100;
146  break;
147  case 'r': RULES = true;
148  fprintf(Of, T_OptRules);
149  ArgOK = true;
150  break;
151  case 'S': SetFOpt(SAMPLE);
152  fprintf(Of, T_OptSampling, SAMPLE);
153  Check(SAMPLE, 0.1, 99.9);
154  SAMPLE /= 100;
155  break;
156  case 'I': SetIOpt(KRInit);
157  fprintf(Of, T_OptSeed, KRInit);
158  KRInit = KRInit & 07777;
159  break;
160  case 'u': SetIOpt(UTILITY);
161  fprintf(Of, T_OptUtility, UTILITY);
162  Check(UTILITY, 2, 10000);
163  RULES = true;
164  break;
165  case 'e': NOCOSTS = true;
166  fprintf(Of, T_OptNoCosts);
167  ArgOK = true;
168  break;
169  case 'w': WINNOW = true;
170  fprintf(Of, T_OptWinnow);
171  ArgOK = true;
172  break;
173  case 'g': GLOBAL = false;
174  fprintf(Of, T_OptNoGlobal);
175  ArgOK = true;
176  break;
177  case 'X': SetIOpt(FOLDS);
178  fprintf(Of, T_OptXval, FOLDS);
179  Check(FOLDS, 2, 1000);
180  XVAL = true;
181  break;
182  }
183 
184  if ( ! ArgOK )
185  {
186  if ( o != 'h' )
187  {
188  fprintf(Of, T_UnregnizedOpt,
189  Option,
190  ( ! OptArg || OptArg == Option+2 ? "" : OptArg ));
191  fprintf(Of, T_SummaryOpts);
192  }
193  fprintf(Of, T_ListOpts);
194  Goodbye(1);
195  }
196  }
197 
198  if ( UTILITY && BOOST )
199  {
200  fprintf(Of, T_UBWarn);
201  }
202 
203  StartTime = ExecTime();
204 
205  /* Get information on training data */
206 
207  if ( ! (F = GetFile(".names", "r")) ) Error(NOFILE, "", "");
208  GetNames(F);
209 
210  if ( ClassAtt )
211  {
212  fprintf(Of, T_ClassVar, AttName[ClassAtt]);
213  }
214 
215  NotifyStage(READDATA);
216  Progress(-1.0);
217 
218  /* Allocate space for SomeMiss[] and SomeNA[] */
219 
222 
223  /* Read data file */
224 
225  if ( ! (F = GetFile(".data", "r")) ) Error(NOFILE, "", "");
226  GetData(F, true, false);
227  fprintf(Of, TX_ReadData(MaxCase+1, MaxAtt, FileStem));
228 
229  if ( XVAL && (F = GetFile(".test", "r")) )
230  {
231  SaveMaxCase = MaxCase;
232  GetData(F, false, false);
233  fprintf(Of, TX_ReadTest(MaxCase-SaveMaxCase, FileStem));
234  }
235 
236  /* Check whether case weight attribute appears */
237 
238  if ( CWtAtt )
239  {
240  fprintf(Of, T_CWtAtt);
241  }
242 
243  if ( ! NOCOSTS && (F = GetFile(".costs", "r")) )
244  {
245  GetMCosts(F);
246  if ( MCost )
247  {
248  fprintf(Of, T_ReadCosts, FileStem);
249  }
250  }
251 
252  /* Note any attribute exclusions/inclusions */
253 
254  if ( AttExIn )
255  {
256  fprintf(Of, "%s", ( AttExIn == -1 ? T_AttributesOut : T_AttributesIn ));
257 
258  ForEach(Att, 1, MaxAtt)
259  {
260  if ( Att != ClassAtt &&
261  Att != CWtAtt &&
262  ( StatBit(Att, SKIP) > 0 ) == ( AttExIn == -1 ) )
263  {
264  fprintf(Of, " %s\n", AttName[Att]);
265  }
266  }
267  }
268 
269  /* Build decision trees */
270 
271  if ( ! BOOST )
272  {
273  TRIALS = 1;
274  }
275 
277  if ( RULES )
278  {
280  }
281 
282  if ( WINNOW )
283  {
284  NotifyStage(WINNOWATTS);
285  Progress(-MaxAtt);
286  WinnowAtts();
287  }
288 
289  if ( XVAL )
290  {
291  CrossVal();
292  }
293  else
294  {
296 
297  /* Evaluation */
298 
299  fprintf(Of, T_EvalTrain, MaxCase+1);
300 
301  NotifyStage(EVALTRAIN);
302  Progress(-TRIALS * (MaxCase+1.0));
303 
304  Evaluate(CMINFO | USAGEINFO);
305 
306  if ( (F = GetFile(( SAMPLE ? ".data" : ".test" ), "r")) )
307  {
308  NotifyStage(READTEST);
309  fprintf(Of, "\n");
310 
311  FreeData();
312  GetData(F, false, false);
313 
314  fprintf(Of, T_EvalTest, MaxCase+1);
315 
316  NotifyStage(EVALTEST);
317  Progress(-TRIALS * (MaxCase+1.0));
318 
319  Evaluate(CMINFO);
320  }
321  }
322 
323  fprintf(Of, T_Time, ExecTime() - StartTime);
324 
325 #ifdef VerbOpt
326  Cleanup();
327 #endif
328 
329  return 0;
330 }