37 #define PartInfo(n) (-(n)*Log((n)/GEnv.Cases))
54 CaseNo i, j, BestI, Tries=0;
55 double LowInfo, LHInfo, LeastInfo=1E38,
56 w, BestGain, BestInfo, ThreshCost=1;
60 Verbosity(3, fprintf(
Of,
"\tAtt %s\n",
AttName[Att]))
70 fprintf(
Of,
"\tAtt %s\tinsufficient cases with known values\n",
84 if (
GEnv.MinSplit > 25 )
GEnv.MinSplit = 25;
93 for ( ; i <=
GEnv.Ep ; i++ )
100 GEnv.Freq[2][c] += w;
101 GEnv.Freq[3][c] -= w;
114 if (
GEnv.SRec[j].C !=
GEnv.HighClass )
GEnv.HighClass = 0;
124 if ( LowInfo >= LeastInfo )
126 for ( i++ ; i <=
GEnv.Ep ; i++ )
128 if (
GEnv.SRec[i+1].V >
GEnv.SRec[i].V )
137 if ( LHInfo < LeastInfo )
142 BestInfo = (
GEnv.FixedSplitInfo
150 fprintf(
Of,
"\t\tCut at %.3f (gain %.3f):",
152 (1 -
GEnv.UnknownRate) *
153 (
GEnv.BaseInfo - (
GEnv.NAInfo + LHInfo) /
GEnv.KnownCases));
162 BestGain = (1 -
GEnv.UnknownRate) *
163 (
GEnv.BaseInfo - (
GEnv.NAInfo + LeastInfo) /
GEnv.KnownCases);
171 (
GEnv.SRec[BestI+1].V -
GEnv.SRec[BestI].V);
172 ThreshCost = ( Interval < Tries ? Log(Interval) : Log(Tries) )
176 BestGain -= ThreshCost;
183 Verbosity(2, fprintf(
Of,
"\tAtt %s\tno gain\n",
AttName[Att]))
187 Gain[Att] = BestGain;
188 Info[Att] = BestInfo;
191 GEnv.HighVal =
GEnv.SRec[BestI+1].V;
203 fprintf(
Of,
"\tAtt %s\tcut=%.3f, inf %.3f, gain %.3f\n",
222 double LHInfo, w, SplitInfo, ThisGain, GR;
242 if (
GEnv.MinSplit > 25 )
GEnv.MinSplit = 25;
245 GEnv.MinSplit *= SampleFrac * 0.33;
251 for ( ; i <=
GEnv.Ep ; i++ )
258 GEnv.Freq[2][c] += w;
259 GEnv.Freq[3][c] -= w;
270 if (
GEnv.SRec[j].C !=
GEnv.HighClass )
GEnv.HighClass = 0;
278 SplitInfo = (
GEnv.FixedSplitInfo
282 ThisGain = (1 -
GEnv.UnknownRate) *
283 (
GEnv.BaseInfo - (
GEnv.NAInfo + LHInfo) /
GEnv.KnownCases);
284 if ( ThisGain >
Gain[Att] )
Gain[Att] = ThisGain;
288 GR = (ThisGain + 1E-5) / SplitInfo;
296 fprintf(
Of,
"\t\tCut at %.3f (gain %.3f):",
297 (
GEnv.LowVal +
GEnv.HighVal) / 2, ThisGain);
307 fprintf(
Of,
"\tAtt %s: max GR estimate %.3f\n",
401 GEnv.NAInfo =
GEnv.FixedSplitInfo = 0;
413 if (
GEnv.ValFreq[0] > 0 || SampleFrac < 1 )
460 (
GEnv.LowCases +
GEnv.SRec[i].W <
GEnv.MinSplit - 1E-5 ||
461 GEnv.SRec[i].V ==
GEnv.SRec[i+1].V ) ;
469 GEnv.Freq[2][c] += w;
470 GEnv.Freq[3][c] -= w;
476 for ( j = i-1;
GEnv.HighClass && j >=
GEnv.Xp ; j-- )
478 if (
GEnv.SRec[j].C !=
GEnv.HighClass )
GEnv.HighClass = 0;
562 if ( (&
GEnv)->SRec[i].V != (&
GEnv)->SRec[i-1].V ) Cuts++;
600 Mid = (Low + High + 1) / 2;
602 if ( (&
GEnv)->SRec[Mid].V > Th )
612 return (&
GEnv)->SRec[Low].V;