mageec  0.1.0
MAchine Guided Energy Efficient Compilation
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
info.c
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Copyright 2010 Rulequest Research Pty Ltd. */
4 /* */
5 /* This file is part of C5.0 GPL Edition, a single-threaded version */
6 /* of C5.0 release 2.07. */
7 /* */
8 /* C5.0 GPL Edition is free software: you can redistribute it and/or */
9 /* modify it under the terms of the GNU General Public License as */
10 /* published by the Free Software Foundation, either version 3 of the */
11 /* License, or (at your option) any later version. */
12 /* */
13 /* C5.0 GPL Edition is distributed in the hope that it will be useful, */
14 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
15 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */
16 /* General Public License for more details. */
17 /* */
18 /* You should have received a copy of the GNU General Public License */
19 /* (gpl.txt) along with C5.0 GPL Edition. If not, see */
20 /* */
21 /* <http://www.gnu.org/licenses/>. */
22 /* */
23 /*************************************************************************/
24 
25 
26 
27 /*************************************************************************/
28 /* */
29 /* Calculate information, information gain, and print dists */
30 /* -------------------------------------------------------- */
31 /* */
32 /*************************************************************************/
33 
34 
35 #include "defns.i"
36 #include "extern.i"
37 
38 
39 /*************************************************************************/
40 /* */
41 /* Given Freq[][] and ValFreq[], compute the information gain. */
42 /* */
43 /*************************************************************************/
44 
45 
46 double ComputeGain(double BaseInfo, float UnknFrac, DiscrValue MaxVal,
47  CaseCount TotalCases)
48 /* ----------- */
49 {
50  DiscrValue v;
51  double ThisInfo=0.0;
52 
53  /* Check whether all values are unknown or the same */
54 
55  if ( ! TotalCases ) return None;
56 
57  /* Compute total info after split, by summing the
58  info of each of the subsets formed by the test */
59 
60  ForEach(v, 1, MaxVal)
61  {
62  ThisInfo += TotalInfo(GEnv.Freq[v], 1, MaxClass);
63  }
64  ThisInfo /= TotalCases;
65 
66  /* Set the gain in information for all cases, adjusted for unknowns */
67 
68  return ( BaseInfo <= ThisInfo ? 0.0 :
69  (1 - UnknFrac) * (BaseInfo - ThisInfo) );
70 
71 }
72 
73 
74 
75 /*************************************************************************/
76 /* */
77 /* Compute the total information in V[ MinVal..MaxVal ] */
78 /* */
79 /*************************************************************************/
80 
81 
82 double TotalInfo(double V[], DiscrValue MinVal, DiscrValue MaxVal)
83 /* --------- */
84 {
85  DiscrValue v;
86  double Sum=0.0, TotalCases=0;
87  CaseCount N;
88 
89  ForEach(v, MinVal, MaxVal)
90  {
91  N = V[v];
92 
93  Sum += N * Log(N);
94  TotalCases += N;
95  }
96 
97  return TotalCases * Log(TotalCases) - Sum;
98 }
99 
100 
101 
102 /*************************************************************************/
103 /* */
104 /* Print distribution table for given attribute */
105 /* */
106 /*************************************************************************/
107 
108 
110  double **Freq, double *ValFreq, Boolean ShowNames)
111 /* ----------------- */
112 {
113  DiscrValue v;
114  ClassNo c;
115  String Val;
116 
117  fprintf(Of, "\n\t\t\t ");
118  ForEach(c, 1, MaxClass)
119  {
120  fprintf(Of, "%7.6s", ClassName[c]);
121  }
122  fprintf(Of, "\n");
123 
124  ForEach(v, MinVal, MaxVal)
125  {
126  if ( ShowNames )
127  {
128  Val = ( ! v ? "unknown" :
129  MaxAttVal[Att] ? AttValName[Att][v] :
130  v == 1 ? "N/A" :
131  v == 2 ? "below" : "above" );
132  fprintf(Of, "\t\t[%-7.7s:", Val);
133  }
134  else
135  {
136  fprintf(Of, "\t\t[%-7d:", v);
137  }
138 
139  ForEach(c, 1, MaxClass)
140  {
141  fprintf(Of, " %6.1f", Freq[v][c]);
142  }
143 
144  fprintf(Of, "]\n");
145  }
146 }