mageec
0.1.0
MAchine Guided Energy Efficient Compilation
Main Page
Namespaces
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Macros
ml
C5
attwinnow.c
Go to the documentation of this file.
1
/*************************************************************************/
2
/* */
3
/* Copyright 2010 Rulequest Research Pty Ltd. */
4
/* */
5
/* This file is part of C5.0 GPL Edition, a single-threaded version */
6
/* of C5.0 release 2.07. */
7
/* */
8
/* C5.0 GPL Edition is free software: you can redistribute it and/or */
9
/* modify it under the terms of the GNU General Public License as */
10
/* published by the Free Software Foundation, either version 3 of the */
11
/* License, or (at your option) any later version. */
12
/* */
13
/* C5.0 GPL Edition is distributed in the hope that it will be useful, */
14
/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
15
/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */
16
/* General Public License for more details. */
17
/* */
18
/* You should have received a copy of the GNU General Public License */
19
/* (gpl.txt) along with C5.0 GPL Edition. If not, see */
20
/* */
21
/* <http://www.gnu.org/licenses/>. */
22
/* */
23
/*************************************************************************/
24
25
26
27
/*************************************************************************/
28
/* */
29
/* Routines for winnowing attributes */
30
/* --------------------------------- */
31
/* */
32
/*************************************************************************/
33
34
35
#include "defns.i"
36
#include "extern.i"
37
38
float
*
AttImp
=
Nil
;
/* att importance */
39
Boolean
*
Split
=
Nil
,
/* atts used in unpruned tree */
40
*
Used
=
Nil
;
/* atts used in pruned tree */
41
42
43
/*************************************************************************/
44
/* */
45
/* Winnow attributes by constructing a tree from half the data. */
46
/* Remove those that are never used as splits and those that */
47
/* increase error on the remaining data, and check that the new */
48
/* error cost does not increase */
49
/* */
50
/*************************************************************************/
51
52
53
void
WinnowAtts
()
54
/* ---------- */
55
{
56
Attribute
Att, Removed=0, Best;
57
CaseNo
i, Bp, Ep;
58
float
Base;
59
Boolean
First=
true
, *Upper;
60
ClassNo
c;
61
extern
Attribute
*
DList
;
62
extern
int
NDList
;
63
64
/* Save original case order */
65
66
SaveCase
=
Alloc
(
MaxCase
+1,
DataRec
);
67
ForEach
(i, 0,
MaxCase
)
68
{
69
SaveCase
[i] =
Case
[i];
70
}
71
72
/* Split data into two halves with equal class frequencies */
73
74
Upper =
AllocZero
(
MaxClass
+1,
Boolean
);
75
76
Bp = 0;
77
Ep =
MaxCase
;
78
ForEach
(i, 0,
MaxCase
)
79
{
80
c =
Class
(
SaveCase
[i]);
81
82
if
( Upper[c] )
83
{
84
Case
[Ep--] =
SaveCase
[i];
85
}
86
else
87
{
88
Case
[Bp++] =
SaveCase
[i];
89
}
90
91
Upper[c] = ! Upper[c];
92
}
93
94
Free
(Upper);
95
96
/* Use first 50% of the cases for building a winnowing tree
97
and remaining 50% for measuring attribute importance */
98
99
AttImp
=
AllocZero
(
MaxAtt
+1,
float
);
100
Split
=
AllocZero
(
MaxAtt
+1,
Boolean
);
101
Used
=
AllocZero
(
MaxAtt
+1,
Boolean
);
102
103
Base =
TrialTreeCost
(
true
);
104
105
/* Remove attributes when doing so would reduce error cost */
106
107
ForEach
(Att, 1,
MaxAtt
)
108
{
109
if
(
AttImp
[Att] < 0 )
110
{
111
SpecialStatus
[Att] ^=
SKIP
;
112
Removed++;
113
}
114
}
115
116
/* If any removed, rebuild tree and reinstate if error increases */
117
118
if
( Removed &&
TrialTreeCost
(
false
) > Base )
119
{
120
ForEach
(Att, 1,
MaxAtt
)
121
{
122
if
(
AttImp
[Att] < 0 )
123
{
124
AttImp
[Att] = 1;
125
SpecialStatus
[Att] ^=
SKIP
;
126
Verbosity(1, fprintf(
Of
,
" re-including %s\n"
,
AttName
[Att]))
127
}
128
}
129
130
Removed=0;
131
}
132
133
/* Discard unused attributes */
134
135
ForEach
(Att, 1,
MaxAtt
)
136
{
137
if
( Att !=
ClassAtt
&& !
Skip
(Att) && !
Split
[Att] )
138
{
139
SpecialStatus
[Att] ^=
SKIP
;
140
Removed++;
141
}
142
}
143
144
/* Print summary of winnowing */
145
146
if
( ! Removed )
147
{
148
fprintf(
Of
, T_NoWinnow);
149
}
150
else
151
{
152
fprintf(
Of
, T_AttributesWinnowed, Removed, Plural(Removed));
153
154
/* Print remaining attributes ordered by importance */
155
156
while
(
true
)
157
{
158
Best = 0;
159
ForEach
(Att, 1,
MaxAtt
)
160
{
161
if
(
AttImp
[Att] >= 1 &&
162
( ! Best ||
AttImp
[Att] >
AttImp
[Best] ) )
163
{
164
Best = Att;
165
}
166
}
167
if
( ! Best )
break
;
168
169
if
( First )
170
{
171
fprintf(
Of
, T_EstImportance);
172
First =
false
;
173
}
174
if
(
AttImp
[Best] >= 1.005 )
175
{
176
fprintf(
Of
,
"%7d%% %s\n"
,
177
(
int
) ((
AttImp
[Best] - 1) * 100 + 0.5),
178
AttName
[Best]);
179
}
180
else
181
{
182
fprintf(
Of
,
" <1%% %s\n"
,
AttName
[Best]);
183
}
184
AttImp
[Best] = 0;
185
}
186
}
187
188
if
( Removed )
189
{
190
Winnowed
=
true
;
191
192
/* Reset DList */
193
194
NDList = 0;
195
ForEach
(Att, 1,
MaxAtt
)
196
{
197
if
(
DFreq
[Att] && !
Skip
(Att) )
198
{
199
DList[NDList++] = Att;
200
}
201
}
202
}
203
204
/* Restore case order and clean up */
205
206
ForEach
(i, 0,
MaxCase
)
207
{
208
Case
[i] =
SaveCase
[i];
209
}
210
211
FreeUnlessNil
(
SaveCase
);
SaveCase
=
Nil
;
212
FreeUnlessNil
(
AttImp
);
AttImp
=
Nil
;
213
FreeUnlessNil
(
Split
);
Split
=
Nil
;
214
FreeUnlessNil
(
Used
);
Used
=
Nil
;
215
216
Now
= 0;
217
}
218
219
220
221
/*************************************************************************/
222
/* */
223
/* Build trial tree and check error cost on remaining data. */
224
/* If first time, note split attributes and check effect of */
225
/* removing every attribute */
226
/* */
227
/*************************************************************************/
228
229
230
float
TrialTreeCost
(
Boolean
FirstTime)
231
/* ------------- */
232
{
233
Attribute
Att;
234
float
Base, Cost, SaveMINITEMS;
235
CaseNo
SaveMaxCase, Cut;
236
int
SaveVERBOSITY;
237
238
Verbosity(1,
239
fprintf(
Of
, ( FirstTime ?
"\nWinnow cycle:\n"
:
"\nCheck:\n"
)))
240
241
/* Build and prune trial tree */
242
243
SaveMaxCase =
MaxCase
;
244
SaveVERBOSITY =
VERBOSITY
;
245
SaveMINITEMS =
MINITEMS
;
246
MINITEMS
=
Max
(
MINITEMS
/ 2, 2.0);
247
248
Cut = (
MaxCase
+1) / 2 - 1;
249
250
InitialiseWeights
();
251
LEAFRATIO
= 0;
252
VERBOSITY
= 0;
253
MaxCase
= Cut;
254
255
memset(
Tested
, 0,
MaxAtt
+1);
/* reset tested attributes */
256
257
SetMinGainThresh
();
258
FormTree
(0, Cut, 0, &
WTree
);
259
260
if
( FirstTime )
261
{
262
/* Find attributes used in unpruned tree */
263
264
ScanTree
(
WTree
,
Split
);
265
}
266
267
Prune
(
WTree
);
268
269
VERBOSITY
= SaveVERBOSITY;
270
MaxCase
= SaveMaxCase;
271
MINITEMS
= SaveMINITEMS;
272
273
Verbosity(2,
274
PrintTree
(
WTree
,
"Winnowing tree:"
);
275
fprintf(
Of
,
"\n training error cost %g\n"
,
ErrCost
(
WTree
, 0, Cut)))
276
277
Base =
ErrCost
(
WTree
, Cut+1,
MaxCase
);
278
279
Verbosity(1,
280
fprintf(
Of
,
" initial error cost %g\n"
, Base))
281
282
if
( FirstTime )
283
{
284
/* Check each attribute used in pruned tree */
285
286
ScanTree
(
WTree
,
Used
);
287
288
ForEach
(Att, 1,
MaxAtt
)
289
{
290
291
if
( !
Used
[Att] )
292
{
293
Verbosity(1,
294
if
( Att !=
ClassAtt
&& !
Skip
(Att) )
295
{
296
fprintf(
Of
,
" %s not used\n"
,
AttName
[Att]);
297
})
298
299
if
(
Split
[Att] )
300
{
301
AttImp
[Att] = 1;
302
}
303
304
continue
;
305
}
306
307
/* Determine error cost if this attribute omitted */
308
309
SpecialStatus
[Att] ^=
SKIP
;
310
311
Cost =
ErrCost
(
WTree
, Cut+1,
MaxCase
);
312
313
AttImp
[Att] = ( Cost < Base ? -1 : Cost / Base );
314
Verbosity(1,
315
fprintf(
Of
,
" error cost without %s = %g%s\n"
,
316
AttName
[Att], Cost,
317
( Cost < Base ?
" - excluded"
:
""
)))
318
319
SpecialStatus
[Att] ^=
SKIP
;
320
}
321
}
322
323
if
(
WTree
)
324
{
325
FreeTree
(
WTree
);
WTree
=
Nil
;
326
}
327
328
return
Base;
329
}
330
331
332
333
/*************************************************************************/
334
/* */
335
/* Determine the error rate or cost of T on cases Fp through Lp */
336
/* */
337
/*************************************************************************/
338
339
340
float
ErrCost
(
Tree
T,
CaseNo
Fp,
CaseNo
Lp)
341
/* ------- */
342
{
343
CaseNo
i;
344
float
ErrCost
=0;
345
ClassNo
Pred;
346
347
if
(
MCost
)
348
{
349
ForEach
(i, Fp, Lp)
350
{
351
if
( (Pred =
TreeClassify
(
Case
[i], T)) !=
Class
(
Case
[i]) )
352
{
353
ErrCost +=
MCost
[Pred][
Class
(
Case
[i])];
354
}
355
}
356
}
357
else
358
{
359
ForEach
(i, Fp, Lp)
360
{
361
if
(
TreeClassify
(
Case
[i], T) !=
Class
(
Case
[i]) )
362
{
363
ErrCost += 1.0;
364
}
365
}
366
}
367
368
return
ErrCost
;
369
}
370
371
372
373
/*************************************************************************/
374
/* */
375
/* Find attributes used in tree T */
376
/* */
377
/*************************************************************************/
378
379
380
void
ScanTree
(
Tree
T,
Boolean
*
Used
)
381
/* -------- */
382
{
383
DiscrValue
v;
384
385
if
( T->
NodeType
)
386
{
387
Used[T->
Tested
] =
true
;
388
389
ForEach
(v, 1, T->
Forks
)
390
{
391
ScanTree
(T->
Branch
[v], Used);
392
}
393
}
394
}
Generated on Wed Aug 6 2014 16:45:14 for mageec by
1.8.4