mageec  0.1.0
MAchine Guided Energy Efficient Compilation
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
global.c
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Copyright 2010 Rulequest Research Pty Ltd. */
4 /* */
5 /* This file is part of C5.0 GPL Edition, a single-threaded version */
6 /* of C5.0 release 2.07. */
7 /* */
8 /* C5.0 GPL Edition is free software: you can redistribute it and/or */
9 /* modify it under the terms of the GNU General Public License as */
10 /* published by the Free Software Foundation, either version 3 of the */
11 /* License, or (at your option) any later version. */
12 /* */
13 /* C5.0 GPL Edition is distributed in the hope that it will be useful, */
14 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
15 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */
16 /* General Public License for more details. */
17 /* */
18 /* You should have received a copy of the GNU General Public License */
19 /* (gpl.txt) along with C5.0 GPL Edition. If not, see */
20 /* */
21 /* <http://www.gnu.org/licenses/>. */
22 /* */
23 /*************************************************************************/
24 
25 
26 
27 /*************************************************************************/
28 /* */
29 /* General data for C5.0 */
30 /* --------------------- */
31 /* */
32 /*************************************************************************/
33 
34 #include "defns.i"
35 
36 /*************************************************************************/
37 /* */
38 /* Parameters etc */
39 /* */
40 /*************************************************************************/
41 
42 int VERBOSITY=0, /* verbosity level (0 = none) */
43  TRIALS=1, /* number of trees to be grown */
44  FOLDS=10, /* crossvalidation folds */
45  UTILITY=0; /* rule utility bands */
46 
47 Boolean SUBSET=0, /* subset tests allowed */
48  BOOST=0, /* boosting invoked */
49  PROBTHRESH=0, /* to use soft thresholds */
50  RULES=0, /* rule-based classifiers */
51  XVAL=0, /* perform crossvalidation */
52  NOCOSTS=0, /* ignoring costs */
53  WINNOW=0, /* attribute winnowing */
54  GLOBAL=1; /* use global pruning for trees */
55 
56 CaseCount MINITEMS=2, /* minimum cases each side of a cut */
57  LEAFRATIO=0; /* leaves per case for boosting */
58 
59 float CF=0.25, /* confidence limit for tree pruning */
60  SAMPLE=0.0; /* sample training proportion */
61 
62 Boolean LOCK=false; /* sample locked */
63 
64 
65 /*************************************************************************/
66 /* */
67 /* Attributes and data */
68 /* */
69 /*************************************************************************/
70 
71 Attribute ClassAtt=0, /* attribute to use as class */
72  LabelAtt=0, /* attribute to use as case ID */
73  CWtAtt=0; /* attribute to use for case weight */
74 
75 double AvCWt; /* average case weight */
76 
77 String *ClassName=0, /* class names */
78  *AttName=0, /* att names */
79  **AttValName=0; /* att value names */
80 
81 char *IgnoredVals=0; /* values of labels and atts marked ignore */
82 int IValsSize=0, /* size of above */
83  IValsOffset=0; /* index of first free char */
84 
85 int MaxAtt, /* max att number */
86  MaxClass, /* max class number */
87  MaxDiscrVal=3, /* max discrete values for any att */
88  MaxLabel=0, /* max characters in case label */
89  LineNo=0, /* input line number */
90  ErrMsgs=0, /* errors found */
91  AttExIn=0, /* attribute exclusions/inclusions */
92  TSBase=0; /* base day for time stamps */
93 
94 DiscrValue *MaxAttVal=0; /* number of values for each att */
95 
96 char *SpecialStatus=0;/* special att treatment */
97 
98 Definition *AttDef=0; /* definitions of implicit atts */
99 Attribute **AttDefUses=0; /* list of attributes used by definition */
100 
101 Boolean *SomeMiss=Nil, /* att has missing values */
102  *SomeNA=Nil, /* att has N/A values */
103  Winnowed=0; /* atts have been winnowed */
104 
105 ContValue *ClassThresh=0; /* thresholded class attribute */
106 
107 CaseNo MaxCase=-1; /* max data case number */
108 
109 DataRec *Case=0; /* data cases */
110 
112 
113 String FileStem="undefined";
114 
115 /*************************************************************************/
116 /* */
117 /* Trees */
118 /* */
119 /*************************************************************************/
120 
121 Tree *Raw=0, /* unpruned trees */
122  *Pruned=0, /* pruned trees */
123  WTree=0; /* winnow tree */
124 
125 float Confidence, /* set by classify() */
126  SampleFrac=1, /* fraction used when sampling */
127  *Vote=0, /* total votes for classes */
128  *BVoteBlock=0, /* boost voting block */
129  **MCost=0, /* misclass cost [pred][real] */
130  **NCost=0, /* normalised MCost used for rules */
131  *WeightMul=0; /* prior adjustment factor */
132 
133 CRule *MostSpec=0; /* most specific rule for each class */
134 
135 Boolean UnitWeights=1, /* all weights are 1.0 */
136  CostWeights=0; /* reweight cases for costs */
137 
138 int Trial, /* trial number for boosting */
139  MaxTree=0; /* max tree grown */
140 
141 ClassNo *TrialPred=0; /* predictions for each boost trial */
142 
143 double *ClassFreq=0, /* ClassFreq[c] = # cases of class c */
144  **DFreq=0; /* DFreq[a][c*x] = Freq[][] for attribute a */
145 
146 float *Gain=0, /* Gain[a] = info gain by split on att a */
147  *Info=0, /* Info[a] = max info from split on att a */
148  *EstMaxGR=0, /* EstMaxGR[a] = est max GR from folit on a */
149  *ClassSum=0; /* class weights during classification */
150 
151 ContValue *Bar=0; /* Bar[a] = best threshold for contin att a */
152 
153 double GlobalBaseInfo, /* base information before split */
154  **Bell=0; /* table of Bell numbers for subsets */
155 
156 Byte *Tested=0; /* Tested[a] = att a already tested */
157 
158 Set **Subset=0; /* Subset[a][s] = subset s for att a */
159 int *Subsets=0; /* Subsets[a] = no. subsets for att a */
160 
161 EnvRec GEnv; /* environment block */
162 
163 /*************************************************************************/
164 /* */
165 /* Rules */
166 /* */
167 /*************************************************************************/
168 
169 CRule *Rule=0; /* current rules */
170 
171 RuleNo NRules, /* number of rules */
172  RuleSpace; /* space currently allocated for rules */
173 
174 CRuleSet *RuleSet=0; /* rulesets */
175 
176 ClassNo Default; /* default class associated with ruleset or
177  boosted classifier */
178 
179 Byte **Fires=Nil, /* Fires[r][*] = cases covered by rule r */
180  *CBuffer=Nil; /* buffer for compressing lists */
181 
182 int *CovBy=Nil, /* entry numbers for Fires inverse */
183  *List=Nil; /* temporary list of cases or rules */
184 
185 float AttTestBits, /* average bits to encode tested attribute */
186  *BranchBits=0; /* ditto attribute value */
187 int *AttValues=0, /* number of attribute values in the data */
188  *PossibleCuts=0;/* number of thresholds for an attribute */
189 
190 double *LogCaseNo=0, /* LogCaseNo[i] = log2(i) */
191  *LogFact=0; /* LogFact[i] = log2(i!) */
192 
193 int *UtilErr=0, /* error by utility band */
194  *UtilBand=0; /* last rule in each band */
195 double *UtilCost=0; /* cost ditto */
196 
197 
198 /*************************************************************************/
199 /* */
200 /* Misc */
201 /* */
202 /*************************************************************************/
203 
204 int KRInit=0, /* KRandom initializer for SAMPLE */
205  Now=0; /* current stage */
206 
207 FILE *TRf=0; /* file pointer for tree and rule i/o */
208 char Fn[500]; /* file name */
209 
210 FILE *Of=0; /* output file */
211