49 #define SAMPLEUNIT 2000
254 if (
GEnv.SubsetInfo )
286 if (
Now == WINNOWATTS )
343 ClassNo c, BestLeaf=1, Least=1;
381 fprintf(
Of,
"\n<%d> %d cases", Level, No(Fp,Lp));
382 if ( fabs(No(Fp,Lp) - Cases) >= 0.1 )
384 fprintf(
Of,
", total weight %.1f", Cases);
392 if (
ClassFreq[BestLeaf] >= 0.999 * Cases ||
423 if ( BestAtt ==
None )
425 Verbosity(1, fprintf(
Of,
"\tno sensible splits\n"))
431 fprintf(
Of,
"\tbest attribute %s",
AttName[BestAtt]);
434 fprintf(
Of,
" cut %.3f",
Bar[BestAtt]);
436 fprintf(
Of,
" inf %.3f gain %.3f val %.3f\n",
461 Divide(Node, Fp, Lp, Level);
472 if ( TreeErrs >= 0.999 * Node->
Errors )
475 fprintf(
Of,
"<%d> Collapse tree for %d cases to leaf %s\n",
525 Sample(Fp, Lp, SampleSize);
526 SLp = Fp + SampleSize - 1;
530 NewCases = CountCases(Fp, SLp);
562 Interval = No(Fp, Lp) / (double) N;
566 j = (i + 0.5) * Interval;
568 assert(j >= 0 && Fp + j <= Lp);
570 Swap(Fp + i, Fp + j);
601 for ( Att =
MaxAtt ; Att > 0 ; Att-- )
614 for ( j =
NWaiting-1 ; j >= i ; j-- )
633 for ( Att =
MaxAtt ; Att > 0 ; Att-- )
708 double BestVal,
Val, MinGain=1E6, AvGain=0, MDL;
731 if ( ! Possible )
return None;
734 MDL = Log(Possible) / Cases;
738 fprintf(
Of,
"\tav gain=%.3f, MDL (%d) = %.3f, min=%.3f\n",
739 AvGain, Possible, MDL, MinGain))
749 if (
Gain[Att] >= 0.999 * MinGain &&
Info[Att] > 0 )
755 if ( Val > BestVal ||
756 Val > 0.999 * BestVal &&
758 NBr == BestNBr &&
Gain[Att] >
Gain[BestAtt] ) )
791 NBr = (
GEnv.ValFreq[1] > 0.5 ? 3 : 2 );
807 if (
GEnv.ValFreq[v] > 0.5 ) NBr++;
820 fprintf(
Of,
"\t(cancelled -- %d leaves, max %d)\n", NBr,
MaxLeaves))
838 CaseNo Bp, Ep, Missing, Cases, i;
839 CaseCount KnownCases, MissingCases, BranchCases;
848 Missing = (Ep =
Group(0, Fp, Lp, T)) - Fp + 1;
850 KnownCases = T->
Cases - (MissingCases = CountCases(Fp, Ep));
867 if ( (Cases = No(Fp,Lp)) > 1000 &&
868 Missing > 0.5 * Cases &&
889 Ep =
Group(v, Bp + Missing, Lp, T);
891 assert(Bp + Missing <= Lp+1 && Ep <= Lp);
896 BranchCases = CountCases(Bp + Missing, Ep);
898 Factor = ( ! Missing ? 0 :
902 if ( BranchCases + Factor * MissingCases >= MinLeaf )
908 ForEach(i, Bp, Bp + Missing - 1)
920 for ( i = Ep ; i >= Bp ; i-- )
1002 Thresh = TestNode->
Cut;
1006 (
CVal(
Case[i], Att) <= Thresh) == (V == 2) )
1016 SS = TestNode->
Subset[V];
1137 for ( a = 0 ; a <
NDList ; a++ )
1152 for ( a = 0 ; a <
NDList ; a++ )