mageec  0.1.0
MAchine Guided Energy Efficient Compilation
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
trees.c
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Copyright 2010 Rulequest Research Pty Ltd. */
4 /* */
5 /* This file is part of C5.0 GPL Edition, a single-threaded version */
6 /* of C5.0 release 2.07. */
7 /* */
8 /* C5.0 GPL Edition is free software: you can redistribute it and/or */
9 /* modify it under the terms of the GNU General Public License as */
10 /* published by the Free Software Foundation, either version 3 of the */
11 /* License, or (at your option) any later version. */
12 /* */
13 /* C5.0 GPL Edition is distributed in the hope that it will be useful, */
14 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
15 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */
16 /* General Public License for more details. */
17 /* */
18 /* You should have received a copy of the GNU General Public License */
19 /* (gpl.txt) along with C5.0 GPL Edition. If not, see */
20 /* */
21 /* <http://www.gnu.org/licenses/>. */
22 /* */
23 /*************************************************************************/
24 
25 
26 
27 /*************************************************************************/
28 /* */
29 /* Routines for displaying, building, saving and restoring trees */
30 /* ------------------------------------------------------------- */
31 /* */
32 /*************************************************************************/
33 
34 
35 #include "defns.i"
36 #include "extern.i"
37 
38 
39 #define TabSize 4
40 #define Utility ClassDist[0]
41 #define Digits(n) ((n) < 10 ? 3 : (int)(3 + log(n-1) / log(10.0)))
42 
43 
44  /* If lines look like getting too long while a tree is being
45  printed, subtrees are broken off and printed separately after
46  the main tree is finished */
47 
48 int SubTree, /* highest subtree to be printed */
49  SubSpace=0; /* maximum subtree encountered */
50 Tree *SubDef=Nil; /* pointers to subtrees */
51 Boolean LastBranch[Width]; /* whether printing last branch of subtree */
52 
53 
54 
55 /*************************************************************************/
56 /* */
57 /* Calculate the depth of nodes in a tree in Utility field */
58 /* */
59 /*************************************************************************/
60 
61 
62 void FindDepth(Tree T)
63 /* --------- */
64 {
65  float MaxDepth=0;
66  DiscrValue v;
67 
68  if ( T->NodeType )
69  {
70  ForEach(v, 1, T->Forks)
71  {
72  FindDepth(T->Branch[v]);
73  if ( T->Branch[v]->Utility > MaxDepth )
74  {
75  MaxDepth = T->Branch[v]->Utility;
76  }
77  }
78  }
79 
80  T->Utility = MaxDepth + 1;
81 }
82 
83 
84 
85 /*************************************************************************/
86 /* */
87 /* Display entire decision tree T */
88 /* */
89 /*************************************************************************/
90 
91 
92 void PrintTree(Tree T, String Title)
93 /* --------- */
94 {
95  int s;
96 
97  FindDepth(T);
98 
99  SubTree=0;
100  fprintf(Of, "\n%s\n", Title);
101  Show(T, 0);
102  fprintf(Of, "\n");
103 
104  ForEach(s, 1, SubTree)
105  {
106  fprintf(Of, T_Subtree, s);
107  Show(SubDef[s], 0);
108  fprintf(Of, "\n");
109  }
110 }
111 
112 
113 
114 /*************************************************************************/
115 /* */
116 /* Display the tree T with offset Sh */
117 /* */
118 /*************************************************************************/
119 
120 
121 void Show(Tree T, int Sh)
122 /* ---- */
123 {
124  DiscrValue v, MaxV, BrNo, Simplest, First;
125  CaseCount Errors=0.0;
126 
127  if ( T->NodeType )
128  {
129  /* See whether separate subtree needed */
130 
131  if ( Sh && Sh * TabSize + MaxLine(T) > Width )
132  {
133  if ( ++SubTree >= SubSpace )
134  {
135  SubSpace += 100;
136  if ( SubDef )
137  {
138  Realloc(SubDef, SubSpace, Tree);
139  }
140  else
141  {
142  SubDef = Alloc(SubSpace, Tree);
143  }
144  }
145 
146  SubDef[SubTree] = T;
147  fprintf(Of, " [S%d]", SubTree);
148  }
149  else
150  {
151  MaxV = T->Forks;
152 
153  /* Skip N/A branch if no cases */
154 
155  First = ( EmptyNA(T) ? 2 : 1 );
156  BrNo = First - 1;
157 
158  /* Print simplest branches first */
159 
160  while ( BrNo < MaxV )
161  {
162  Simplest = First;
163  ForEach(v, 2, MaxV)
164  {
165  if ( T->Branch[v]->Utility < T->Branch[Simplest]->Utility ||
166 
167  T->Branch[v]->Utility == 1 && ! T->Branch[v]->Cases )
168  {
169  Simplest = v;
170  }
171  }
172 
173  LastBranch[Sh+1] = ( ++BrNo == MaxV );
174  ShowBranch(Sh, T, Simplest, (int)( BrNo == First ));
175  T->Branch[Simplest]->Utility = 1E10;
176  }
177  }
178  }
179  else
180  {
181  fprintf(Of, " %s (%.8g", ClassName[T->Leaf], P1(T->Cases));
182  if ( T->Cases >= MinLeaf )
183  {
184  if ( (Errors = T->Cases - T->ClassDist[T->Leaf]) >= 0.05 )
185  {
186  fprintf(Of, "/%.8g", P1(Errors));
187  }
188  }
189  putc(')', Of);
190  }
191 }
192 
193 
194 
195 /*************************************************************************/
196 /* */
197 /* Print a node T with offset Sh, branch value v, and continue */
198 /* */
199 /*************************************************************************/
200 
201 
202 void ShowBranch(int Sh, Tree T, DiscrValue v, DiscrValue BrNo)
203 /* ---------- */
204 {
205  DiscrValue Pv, Last;
206  Attribute Att;
207  Boolean FirstValue;
208  int TextWidth, Skip, Values, i, Extra;
209  char CVS1[20], CVS2[20];
210 
211  Att = T->Tested;
212 
213  switch ( T->NodeType )
214  {
215  case BrDiscr:
216 
217  Indent(Sh, BrNo);
218 
219  fprintf(Of, "%s = %s:", AttName[Att], AttValName[Att][v]);
220 
221  break;
222 
223  case BrThresh:
224 
225  Indent(Sh, BrNo);
226 
227  fprintf(Of, "%s", AttName[Att]);
228 
229  if ( v == 1 )
230  {
231  fprintf(Of, " = N/A:");
232  }
233  else
234  if ( T->Lower != T->Upper )
235  {
236  if ( v == 2 )
237  {
238  CValToStr(T->Lower, Att, CVS1);
239  CValToStr(T->Mid , Att, CVS2);
240  fprintf(Of, " <= %s (%s):", CVS1, CVS2);
241  }
242  else
243  {
244  CValToStr(T->Upper, Att, CVS1);
245  CValToStr(T->Mid , Att, CVS2);
246  fprintf(Of, " >= %s (%s):", CVS1, CVS2);
247  }
248  }
249  else
250  {
251  CValToStr(T->Cut, Att, CVS1);
252  fprintf(Of, " %s %s:", ( v == 2 ? "<=" : ">" ), CVS1);
253  }
254 
255  break;
256 
257  case BrSubset:
258 
259  /* Count values at this branch */
260 
261  Values = Elements(Att, T->Subset[v], &Last);
262  if ( ! Values ) return;
263 
264  Indent(Sh, BrNo);
265 
266  if ( Values == 1 )
267  {
268  fprintf(Of, "%s = %s:", AttName[Att], AttValName[Att][Last]);
269  break;
270  }
271 
272  if ( Ordered(Att) )
273  {
274  /* Find first value */
275 
276  for ( Pv = 1 ; ! In(Pv, T->Subset[v]) ; Pv++ )
277  ;
278 
279  fprintf(Of, "%s %s [%s-%s]:", AttName[Att], T_InRange,
280  AttValName[Att][Pv], AttValName[Att][Last]);
281  break;
282  }
283 
284  fprintf(Of, "%s %s {", AttName[Att], T_ElementOf);
285  FirstValue = true;
286  Skip = CharWidth(AttName[Att]) + CharWidth(T_ElementOf) + 3;
287  TextWidth = Skip + Sh * TabSize;
288 
289  ForEach(Pv, 1, Last)
290  {
291  if ( In(Pv, T->Subset[v]) )
292  {
293  /* Find number of characters after this element */
294 
295  if ( Pv != Last || T->Branch[v]->NodeType )
296  {
297  Extra = 1; /* for ":" */
298  }
299  else
300  {
301  Extra = 2 /* for ": " */
302  + CharWidth(ClassName[T->Branch[v]->Leaf])
303  + 3 /* for " ()" */
304  + Digits(T->Cases)
305  + ( T->Errors < 0.05 ? 0 :
306  1 /* for "/" */
307  + Digits(T->Errors) );
308  }
309 
310  if ( ! FirstValue &&
311  TextWidth + CharWidth(AttValName[Att][Pv]) +
312  Extra + 1 > Width )
313  {
314  Indent(Sh, 0);
315  fprintf(Of, "%s",
316  ( LastBranch[Sh+1] && ! T->Branch[v]->NodeType ?
317  " " : ": " ));
318  ForEach(i, 5, Skip) putc(' ', Of);
319 
320  TextWidth = Skip + Sh * TabSize;
321  FirstValue = true;
322  }
323 
324  fprintf(Of, "%s%c",
325  AttValName[Att][Pv], Pv == Last ? '}' : ',');
326  TextWidth += CharWidth(AttValName[Att][Pv]) + 1;
327  FirstValue = false;
328  }
329  }
330  putc(':', Of);
331  }
332 
333  Show(T->Branch[v], Sh+1);
334 }
335 
336 
337 
338 /*************************************************************************/
339 /* */
340 /* Count the elements in a subset and record the last */
341 /* */
342 /*************************************************************************/
343 
344 
346 /* -------- */
347 {
348  DiscrValue Pv, Values=0;
349 
350  ForEach(Pv, 1, MaxAttVal[Att])
351  {
352  if ( In(Pv, S) )
353  {
354  *Last = Pv;
355  Values++;
356  }
357  }
358 
359  return Values;
360 }
361 
362 
363 
364 /*************************************************************************/
365 /* */
366 /* Find the approximate maximum single line size for non-leaf */
367 /* subtree T */
368 /* */
369 /*************************************************************************/
370 
371 
372 int MaxLine(Tree T)
373 /* ------- */
374 {
375  Attribute Att;
376  DiscrValue v, vv;
377  int Ll, One, MaxLl=0;
378 
379  Att = T->Tested;
380 
381  /* First find the max length of the line excluding tested att */
382 
383  ForEach(v, 1, T->Forks)
384  {
385  switch ( T->NodeType )
386  {
387  case BrThresh:
388  if ( TStampVal(Att) )
389  {
390  Ll = ( T->Lower != T->Upper ? 41 : 19 );
391  }
392  else
393  if ( DateVal(Att) )
394  {
395  Ll = ( T->Lower != T->Upper ? 23 : 10 );
396  }
397  else
398  if ( TimeVal(Att) )
399  {
400  Ll = ( T->Lower != T->Upper ? 19 : 8 );
401  }
402  else
403  {
404  Ll = ( T->Lower != T->Upper ? 11 : 4 );
405  }
406  break;
407 
408  case BrDiscr:
409  if ( Ordered(Att) )
410  {
411  vv = T->Cut;
412 
413  switch ( v )
414  {
415  case 1:
416  Ll = 3;
417  break;
418 
419  case 2:
420  Ll = CharWidth(AttValName[Att][2]);
421  if ( vv != 2 )
422  {
423  Ll += CharWidth(AttValName[Att][vv])+1;
424  }
425  break;
426 
427  case 3:
428  Ll = CharWidth(AttValName[Att][MaxAttVal[Att]]);
429  if ( vv != MaxAttVal[Att] - 1 )
430  {
431  Ll += CharWidth(AttValName[Att][vv+1])+1;
432  }
433  }
434  }
435  else
436  {
437  Ll = CharWidth(AttValName[Att][v]) + 1;
438  }
439  break;
440 
441  case BrSubset: /* difficult! */
442  Ll = 0;
443  ForEach(vv, 1, MaxAttVal[Att])
444  {
445  if ( In(vv,T->Subset[v]) )
446  {
447  One = CharWidth(AttValName[Att][vv]) + 6;
448  if ( One > Ll ) Ll = One;
449  }
450  }
451  }
452 
453  /* Check whether ends in leaf */
454 
455  if ( ! T->Branch[v]->NodeType &&
456  ( v > 1 || T->Branch[v]->Cases > 0.01 ) )
457  {
458  Ll += CharWidth(ClassName[T->Branch[v]->Leaf]) + 6;
459  }
460 
461  if ( Ll > MaxLl ) MaxLl = Ll;
462  }
463 
464  return CharWidth(AttName[Att]) + 4 + MaxLl;
465 }
466 
467 
468 
469 /*************************************************************************/
470 /* */
471 /* Indent Sh columns */
472 /* */
473 /*************************************************************************/
474 
475 
476 void Indent(int Sh, int BrNo)
477 /* ------ */
478 {
479  int i;
480 
481  fprintf(Of, "\n");
482  for ( i = 1 ; i <= Sh ; i++ )
483  {
484  fprintf(Of, "%s", ( i == Sh && BrNo == 1 ? ":..." :
485  LastBranch[i] ? " " : ": " ));
486  }
487 }
488 
489 
490 
491 /*************************************************************************/
492 /* */
493 /* Free up space taken up by tree T */
494 /* */
495 /*************************************************************************/
496 
497 
498 void FreeTree(Tree T)
499 /* -------- */
500 {
501  DiscrValue v;
502 
503  if ( ! T ) return;
504 
505  if ( T->NodeType )
506  {
507  ForEach(v, 1, T->Forks)
508  {
509  FreeTree(T->Branch[v]);
510  }
511 
512  Free(T->Branch);
513 
514  if ( T->NodeType == BrSubset )
515  {
516  FreeVector((void **) T->Subset, 1, T->Forks);
517  }
518 
519  }
520 
521  Free(T->ClassDist);
522  Free(T);
523 }
524 
525 
526 
527 /*************************************************************************/
528 /* */
529 /* Construct a leaf in a given node */
530 /* */
531 /*************************************************************************/
532 
533 
534 Tree Leaf(double *Freq, ClassNo NodeClass, CaseCount Cases, CaseCount Errors)
535 /* ---- */
536 {
537  Tree Node;
538  ClassNo c;
539 
540  Node = AllocZero(1, TreeRec);
541 
543  if ( Freq )
544  {
545  ForEach(c, 1, MaxClass)
546  {
547  Node->ClassDist[c] = Freq[c];
548  }
549  }
550 
551  Node->NodeType = 0;
552  Node->Leaf = NodeClass;
553  Node->Cases = Cases;
554  Node->Errors = Errors;
555 
556  return Node;
557 }
558 
559 
560 
561 /*************************************************************************/
562 /* */
563 /* Insert branches in a node */
564 /* */
565 /*************************************************************************/
566 
567 
568 void Sprout(Tree T, DiscrValue Branches)
569 /* ------ */
570 {
571  T->Forks = Branches;
572  T->Branch = AllocZero(Branches+1, Tree);
573 }
574 
575 
576 
577 /*************************************************************************/
578 /* */
579 /* Remove branches etc from a node */
580 /* */
581 /*************************************************************************/
582 
583 
584 void UnSprout(Tree T)
585 /* -------- */
586 {
587  DiscrValue v;
588 
589  ForEach(v, 1, T->Forks)
590  {
591  FreeTree(T->Branch[v]);
592  }
593  Free(T->Branch); T->Branch = Nil;
594 
595  if ( T->NodeType == BrSubset )
596  {
597  FreeVector((void **) T->Subset, 1, T->Forks); T->Subset = Nil;
598  }
599 
600  T->Forks = T->NodeType = 0;
601 }
602 
603 
604 
605 /*************************************************************************/
606 /* */
607 /* Count the non-null leaves in a tree */
608 /* */
609 /*************************************************************************/
610 
611 
613 /* -------- */
614 {
615  int Sum=0;
616  DiscrValue v;
617 
618  if ( T->NodeType )
619  {
620  ForEach(v, ( EmptyNA(T) ? 2 : 1 ), T->Forks)
621  {
622  Sum += TreeSize(T->Branch[v]);
623  }
624 
625  return Sum;
626  }
627 
628  return ( T->Cases >= MinLeaf ? 1 : 0 );
629 }
630 
631 
632 
633 /*************************************************************************/
634 /* */
635 /* Count the non-null leaves in a tree that may contain */
636 /* compressed branches via CompressBranches() */
637 /* */
638 /*************************************************************************/
639 
640 
642 /* ----------------- */
643 {
644  int Sum=0;
645  DiscrValue v, Dummy;
646 
647  if ( ! T->NodeType )
648  {
649  return 1;
650  }
651 
652  ForEach(v, 1, T->Forks)
653  {
654  if ( T->Branch[v]->Cases < MinLeaf ) continue;
655 
656  if ( T->NodeType == BrSubset && ! T->Branch[v]->NodeType )
657  {
658  Sum += Elements(T->Tested, T->Subset[v], &Dummy);
659  }
660  else
661  {
662  Sum += ExpandedLeafCount(T->Branch[v]);
663  }
664  }
665 
666  return Sum;
667 }
668 
669 
670 
671 /*************************************************************************/
672 /* */
673 /* Find the maximum depth of a tree */
674 /* */
675 /*************************************************************************/
676 
677 
679 /* --------- */
680 {
681  DiscrValue v;
682  int Subtree, MaxSubtree=0;
683 
684  if ( T->NodeType )
685  {
686  ForEach(v, 1, T->Forks)
687  {
688  Subtree = TreeDepth(T->Branch[v]);
689  if ( Subtree > MaxSubtree ) MaxSubtree = Subtree;
690  }
691  }
692 
693  return MaxSubtree + 1;
694 }
695 
696 
697 
698 /*************************************************************************/
699 /* */
700 /* Return a copy of tree T */
701 /* */
702 /*************************************************************************/
703 
704 
706 /* -------- */
707 {
708  DiscrValue v;
709  Tree New;
710  int Bytes;
711 
712  New = Alloc(1, TreeRec);
713  memcpy(New, T, sizeof(TreeRec));
714 
715  New->ClassDist = Alloc(MaxClass+1, CaseCount);
716  memcpy(New->ClassDist, T->ClassDist, (MaxClass + 1) * sizeof(CaseCount));
717 
718  if ( T->NodeType == BrSubset )
719  {
720  Bytes = (MaxAttVal[T->Tested]>>3) + 1;
721 
722  New->Subset = Alloc(T->Forks+1, Set);
723  ForEach(v, 1, T->Forks)
724  {
725  New->Subset[v] = Alloc(Bytes, unsigned char);
726  memcpy(New->Subset[v], T->Subset[v], Bytes);
727  }
728  }
729 
730  if ( T->NodeType )
731  {
732  New->Branch = AllocZero(T->Forks+1, Tree);
733  ForEach(v, 1, T->Forks)
734  {
735  New->Branch[v] = CopyTree(T->Branch[v]);
736  }
737  }
738 
739  return New;
740 }