13 #define MAXLINEBUFFER 10000
55 while ( c != ColonOpt && c !=
',' && c !=
'\n' && c !=
'|' && c != EOF )
64 if ( (c =
InChar(f)) ==
'|' ||
Space(c) || c == EOF )
break;
78 while ( ( c =
InChar(f) ) ==
' ' || c ==
'\t' )
104 while ( Sp > s &&
Space(*(Sp-1)) ) Sp--;
108 Msg[0] = (
Space(c) ?
'.' : c );
142 char Buffer[1000]=
"", *EndBuff;
143 int AttCeiling=100, ClassCeiling=100;
192 if ( EndBuff == Buffer || *EndBuff !=
'\0' )
215 while (
ReadName(Nf, Buffer, 1000,
':') )
224 if ( ( *Buffer ==
'a' || *Buffer ==
'A' ) &&
225 ! memcmp(Buffer+1,
"ttributes ", 10) &&
226 ! memcmp(Buffer+strlen(Buffer)-6,
"cluded", 6) )
228 AttExIn = ( ! memcmp(Buffer+strlen(Buffer)-8,
"in", 2) ? 1 : -1 );
237 while (
ReadName(Nf, Buffer, 1000,
':') )
263 if ( ++
MaxAtt >= AttCeiling )
352 sprintf(Buffer,
"%g < %s <= %g",
357 sprintf(Buffer,
"%s > %g",
397 char Buffer[1000]=
"", *p;
399 int ValCeiling=100, BaseYear;
404 if ( ! (
ReadName(Nf, Buffer, 1000,
':') ) )
415 if ( ! strcmp(Buffer,
"continuous") )
419 if ( ! strcmp(Buffer,
"timestamp") )
428 BaseYear = gmtime(&clock)->tm_year + 1900;
433 if ( ! strcmp(Buffer,
"date") )
438 if ( ! strcmp(Buffer,
"time") )
443 if ( ! memcmp(Buffer,
"discrete", 8) )
449 v = atoi(&Buffer[8]);
460 if ( ! strcmp(Buffer,
"ignore") )
465 if ( ! strcmp(Buffer,
"label") )
498 if ( ! memcmp(Buffer,
"[ordered]", 9) )
502 for ( p = Buffer+9 ;
Space(*p) ; p++ )
514 if ( ! (
ReadName(Nf, Buffer, 1000,
':') ) )
519 if ( ++MaxAttVal[MaxAtt] >= ValCeiling )
531 if (
Ordered(MaxAtt) && MaxAttVal[MaxAtt] <= 3 )
552 while ( n <= Last && strcmp(Val, List[n]) ) n++;
554 return ( n <= Last ? n : First-1 );
600 #define XError(a,b,c) Error(a,b,c)
607 char Name[1000], *EndName;
614 if (
ReadName(Df, Name, 1000,
'\00') )
640 if ( ! FirstValue && !
ReadName(Df, Name, 1000,
'\00') )
658 if ( ! strcmp(Name,
"?") )
666 if ( Att !=
ClassAtt && ! strcmp(Name,
"N/A") )
713 DVal(DVec, Att) = Dv;
750 CVal(DVec, Att) = strtod(Name, &EndName);
751 if ( EndName == Name || *EndName !=
'\0' )
779 for ( Dv = 1 ; Dv < MaxClass && Cv >
ClassThresh[Dv] ; Dv++ )
787 if ( !
ReadName(Df, Name, 1000,
'\00') )
854 Cv =
CVal(DVec, Att);
886 #define FailSyn(Msg) {DefSyntaxError(Msg); return false;}
887 #define FailSem(Msg) {DefSemanticsError(Fi, Msg, OpCode); return false;}
897 #define cval _cont_val
898 #define sval _string_val
899 #define dval _discr_val
950 strcmp(
AttName[MaxAtt],
"case weight") )
955 if ( TStack[0].Type ==
'B' )
1010 if ( c == EOF || c ==
'\n' && LastWasPeriod )
1015 if ( ! LastWasPeriod )
Append(
'.');
1034 LastWasPeriod = ( c ==
'.' );
1052 if ( c ==
' ' && (!
BN ||
Buff[
BN-1] ==
' ' ) )
return;
1085 while (
Find(
"or") )
1106 while (
Find(
"and") )
1158 if ( (o =
FindOne(AddOps)) >= 0 )
1167 while ( (o =
FindOne(AddOps)) >= 0 )
1190 while ( (o =
FindOne(MultOps)) >= 0 )
1243 FailSyn(
"attribute, value, or '('");
1254 char *EndPtr, *Str, Date[11], Time[9];
1255 int o, FirstBN, Fi=
BN;
1264 while (
Buff[
BN] !=
'"' )
1273 Str = strdup(
Buff + FirstBN);
1290 if ( (
Buff[BN+4] ==
'/' &&
Buff[BN+7] ==
'/' ||
1291 Buff[BN+4] ==
'-' &&
Buff[BN+7] ==
'-' )&&
1292 isdigit(
Buff[BN+1]) && isdigit(
Buff[BN+2]) &&
1293 isdigit(
Buff[BN+3]) &&
1294 isdigit(
Buff[BN+5]) && isdigit(
Buff[BN+6]) &&
1295 isdigit(
Buff[BN+8]) && isdigit(
Buff[BN+9]) )
1297 memcpy(Date,
Buff+BN, 10);
1307 if (
Buff[BN+2] ==
':' &&
Buff[BN+5] ==
':' &&
1308 isdigit(
Buff[BN+1]) &&
1309 isdigit(
Buff[BN+3]) && isdigit(
Buff[BN+4]) &&
1310 isdigit(
Buff[BN+6]) && isdigit(
Buff[BN+7]) )
1312 memcpy(Time,
Buff+BN, 8);
1323 F = strtod(
Buff+BN, &EndPtr);
1327 if ( EndPtr >
Buff+BN+1 && *(EndPtr-1) ==
'.' )
1338 if ( (o =
FindOne(Funcs)) >= 0 )
1342 if ( !
Find(
"(") )
FailSyn(
"'(' after function name");
1347 if ( !
Find(
")") )
FailSyn(
"')' after function argument");
1353 if (
Buff[BN] ==
'?' )
1356 if ( TStack[
TSN-1].Type ==
'N' )
1366 if ( ! memcmp(
Buff+BN,
"N/A", 3) )
1369 if ( TStack[
TSN-1].Type ==
'N' )
1400 return ( !
Buff[
BN] ?
false : ! memcmp(
Buff+
BN, S, strlen(S)) );
1417 for ( a = 0 ; Alt[a] ; a++ )
1419 if (
Find(Alt[a]) )
return a;
1443 if ( ! LongestAtt ||
1488 for ( ; (RestOfText[i] & 0x80) ; i++)
1491 RestOfText[i] = RestOfText[i+1] =
'.';
1504 char Exp[1000], XMsg[1000], Op[1000];
1512 sprintf(Exp,
"%.10s...%.10s",
Buff+Fi,
Buff+
BN-10);
1516 sprintf(Exp,
"%.*s",
BN - Fi,
Buff+Fi);
1521 case OP_AND: sprintf(Op,
"%s",
"and");
break;
1522 case OP_OR: sprintf(Op,
"%s",
"or");
break;
1524 case OP_EQ: sprintf(Op,
"%s",
"=");
break;
1526 case OP_NE: sprintf(Op,
"%s",
"<>");
break;
1527 case OP_GT: sprintf(Op,
"%s",
">");
break;
1528 case OP_GE: sprintf(Op,
"%s",
">=");
break;
1529 case OP_LT: sprintf(Op,
"%s",
"<");
break;
1530 case OP_LE: sprintf(Op,
"%s",
"<=");
break;
1531 case OP_PLUS: sprintf(Op,
"%s",
"+");
break;
1532 case OP_MINUS: sprintf(Op,
"%s",
"-");
break;
1533 case OP_UMINUS: sprintf(Op,
"%s",
"unary -");
break;
1534 case OP_MULT: sprintf(Op,
"%s",
"*");
break;
1535 case OP_DIV: sprintf(Op,
"%s",
"/");
break;
1536 case OP_MOD: sprintf(Op,
"%s",
"%");
break;
1537 case OP_POW: sprintf(Op,
"%s",
"^");
break;
1538 case OP_SIN: sprintf(Op,
"%s",
"sin");
break;
1539 case OP_COS: sprintf(Op,
"%s",
"cos");
break;
1540 case OP_TAN: sprintf(Op,
"%s",
"tan");
break;
1541 case OP_LOG: sprintf(Op,
"%s",
"log");
break;
1542 case OP_EXP: sprintf(Op,
"%s",
"exp");
break;
1543 case OP_INT: sprintf(Op,
"%s",
"int");
1546 sprintf(XMsg,
"%s with '%s'", Msg, Op);
1566 if (
Buff[Fi] ==
' ' ) Fi++;
1624 if ( TStack[
TSN-2].Type !=
'B' || TStack[
TSN-1].Type !=
'B' )
1633 if ( TStack[
TSN-2].Type != TStack[
TSN-1].Type )
1635 FailSem(
"incompatible values");
1645 if ( TStack[
TSN-2].Type !=
'N' || TStack[
TSN-1].Type !=
'N' )
1647 FailSem(
"non-arithmetic value");
1655 if ( TStack[
TSN-2].Type !=
'S' || TStack[
TSN-1].Type !=
'S' )
1657 FailSem(
"incompatible values");
1669 if ( TStack[
TSN-2].Type !=
'N' || TStack[
TSN-1].Type !=
'N' )
1671 FailSem(
"non-arithmetic value");
1677 if ( TStack[
TSN-1].Type !=
'N' )
1679 FailSem(
"non-arithmetic value");
1690 if ( TStack[
TSN-1].Type !=
'N' )
1692 FailSem(
"non-arithmetic argument");
1697 TStack[
TSN].
Fi = Fi;
1712 #define CUnknownVal(AV) (AV.cval==_UNK.cval)
1713 #define DUnknownVal(AV) (AV.dval==_UNK.dval)
1714 #define DUNA(a) (DUnknownVal(XStack[a]) || NotApplicVal(XStack[a]))
1715 #define CUNA(a) (CUnknownVal(XStack[a]) || NotApplicVal(XStack[a]))
1716 #define C1(x) (CUNA(XSN-1) ? _UNK.cval : (x))
1717 #define C2(x) (CUNA(XSN-1) || CUNA(XSN-2) ? _UNK.cval : (x))
1718 #define CD2(x) (CUNA(XSN-1) || CUNA(XSN-2) ? _UNK.dval : (x))
1719 #define D2(x) (DUNA(XSN-1) || DUNA(XSN-2) ? _UNK.dval : (x))
1726 int XSN=0,
DN, bv1, bv2, Mult;
1733 for (
DN = 0 ; ;
DN++)
1735 switch (
DefOp((DElt = D[
DN])) )
1742 XStack[XSN++].cval =
CVal(Case, Att);
1746 XStack[XSN++].sval =
1753 XStack[XSN++].cval =
DefNVal(DElt);
1757 XStack[XSN++].sval =
DefSVal(DElt);
1761 bv1 = XStack[XSN-2].dval;
1762 bv2 = XStack[XSN-1].dval;
1763 XStack[XSN-2].dval = ( bv1 == 3 || bv2 == 3 ? 3 :
1764 D2(bv1 == 2 && bv2 == 2 ? 2 : 3) );
1769 bv1 = XStack[XSN-2].dval;
1770 bv2 = XStack[XSN-1].dval;
1771 XStack[XSN-2].dval = ( bv1 == 2 || bv2 == 2 ? 2 :
1772 D2(bv1 == 2 || bv2 == 2 ? 2 : 3) );
1777 cv1 = XStack[XSN-2].cval;
1778 cv2 = XStack[XSN-1].cval;
1779 XStack[XSN-2].dval = ( cv1 == cv2 ? 2 : 3 );
1784 cv1 = XStack[XSN-2].cval;
1785 cv2 = XStack[XSN-1].cval;
1786 XStack[XSN-2].dval = ( cv1 != cv2 ? 2 : 3 );
1791 cv1 = XStack[XSN-2].cval;
1792 cv2 = XStack[XSN-1].cval;
1793 XStack[XSN-2].dval =
CD2(cv1 > cv2 ? 2 : 3);
1798 cv1 = XStack[XSN-2].cval;
1799 cv2 = XStack[XSN-1].cval;
1800 XStack[XSN-2].dval =
CD2(cv1 >= cv2 ? 2 : 3);
1805 cv1 = XStack[XSN-2].cval;
1806 cv2 = XStack[XSN-1].cval;
1807 XStack[XSN-2].dval =
CD2(cv1 < cv2 ? 2 : 3);
1812 cv1 = XStack[XSN-2].cval;
1813 cv2 = XStack[XSN-1].cval;
1814 XStack[XSN-2].dval =
CD2(cv1 <= cv2 ? 2 : 3);
1819 sv1 = XStack[XSN-2].sval;
1820 sv2 = XStack[XSN-1].sval;
1821 XStack[XSN-2].dval =
1822 ( ! sv1 && ! sv2 ? 2 :
1823 ! sv1 || ! sv2 ? 3 :
1824 ! strcmp(sv1, sv2) ? 2 : 3 );
1829 sv1 = XStack[XSN-2].sval;
1830 sv2 = XStack[XSN-1].sval;
1831 XStack[XSN-2].dval =
1832 ( ! sv1 && ! sv2 ? 3 :
1833 ! sv1 || ! sv2 ? 2 :
1834 strcmp(sv1, sv2) ? 2 : 3 );
1839 cv1 = XStack[XSN-2].cval;
1840 cv2 = XStack[XSN-1].cval;
1841 XStack[XSN-2].cval =
C2(cv1 + cv2);
1846 cv1 = XStack[XSN-2].cval;
1847 cv2 = XStack[XSN-1].cval;
1848 XStack[XSN-2].cval =
C2(cv1 - cv2);
1853 cv1 = XStack[XSN-2].cval;
1854 cv2 = XStack[XSN-1].cval;
1855 XStack[XSN-2].cval =
C2(cv1 * cv2);
1862 cv1 = XStack[XSN-2].cval;
1863 cv2 = XStack[XSN-1].cval;
1870 XStack[XSN-2].cval = _UNK.cval;
1876 while ( fabs(cv2) > 1 )
1881 XStack[XSN-2].cval = rint(cv1 * Mult) / Mult;
1887 cv1 = XStack[XSN-2].cval;
1888 cv2 = XStack[XSN-1].cval;
1889 XStack[XSN-2].cval =
C2(fmod(cv1, cv2));
1894 cv1 = XStack[XSN-2].cval;
1895 cv2 = XStack[XSN-1].cval;
1896 XStack[XSN-2].cval =
1898 ( cv1 < 0 && ceil(cv2) != cv2 ) ? _UNK.cval :
1904 cv1 = XStack[XSN-1].cval;
1905 XStack[XSN-1].cval =
C1(-cv1);
1909 cv1 = XStack[XSN-1].cval;
1910 XStack[XSN-1].cval =
C1(sin(cv1));
1914 cv1 = XStack[XSN-1].cval;
1915 XStack[XSN-1].cval =
C1(cos(cv1));
1919 cv1 = XStack[XSN-1].cval;
1920 XStack[XSN-1].cval =
C1(tan(cv1));
1924 cv1 = XStack[XSN-1].cval;
1925 XStack[XSN-1].cval =
1926 (
CUNA(XSN-1) || cv1 <= 0 ? _UNK.cval : log(cv1) );
1930 cv1 = XStack[XSN-1].cval;
1931 XStack[XSN-1].cval =
C1(exp(cv1));
1935 cv1 = XStack[XSN-1].cval;
1936 XStack[XSN-1].cval =
C1(rint(cv1));
1940 ReturnVal.cval = XStack[0].cval;
2027 #if defined WIN32 || defined _CONSOLE
2051 int Year, Month, Day;
2065 "%d-%d-%d\"", &Year, &Month, &Day) == 3 )
2074 if ( (F =
GetFile(
".costs",
"r")) )
2099 for ( p += strlen(p) ; *p !=
'"' ; p++ )
2102 if ( *p ==
',' ) p++;
2173 sscanf(
PropVal,
"\"%lf\"", &XD); T->
Cut = XD;
2211 while ( Delim ==
' ' );
2282 while ( Delim ==
' ' );
2332 sscanf(
PropVal,
"\"%f\"", &Lift);
2343 while ( Delim ==
' ' );
2389 sscanf(
PropVal,
"\"%lf\"", &XD); C->
Cut = XD;
2417 while ( Delim ==
' ' );
2509 if ( NTest >= TestSpace )
2525 int FP=0, ri,
TI, *Expect, LRN;
2528 if ( ! RRN )
return Nil;
2559 if ( ! RRN )
return Node;
2564 TestUsed[
TI] =
true;
2570 Expect =
Alloc(RRN,
int);
2596 if ( abs(Expect[ri]) == v )
2619 TestUsed[
TI] =
false;
2659 if ( R->
Lhs[c]->
Cut < Test[TI]->
Cut )
return -2;
2663 if ( R->
Lhs[c]->
Cut > Test[TI]->
Cut )
return -3;
2726 for ( p = PropName ; (c = fgetc(
TRf)) !=
'=' ; )
2728 if ( p - PropName >= 19 || c == EOF )
2731 PropName[0] =
PropVal[0] = *Delim =
'\00';
2738 for ( p =
PropVal ; ((c = fgetc(
TRf)) !=
' ' && c !=
'\n') || Quote ; )
2743 PropName[0] =
PropVal[0] =
'\00';
2747 if ( (i = p -
PropVal) >= PropValSize )
2778 for ( S++ ; *S !=
'"' ; S++ )
2780 if ( *S ==
'\\' ) S++;
2808 for ( p += strlen(p) ; *p !=
'"' ; p++ )
2811 if ( *p ==
',' ) p++;
2844 Node->
Leaf = NodeClass;
2845 Node->
Cases = Cases;
2870 while (
ReadName(Cf, Name, 1000,
':') )
2877 if ( !
ReadName(Cf, Name, 1000,
':') ||
2883 if ( !
ReadName(Cf, Name, 1000,
':') ||
2884 sscanf(Name,
"%f", &Val) != 1 || Val < 0 )
2890 if ( Pred > 0 && Real > 0 && Pred != Real && Val != 1 )
2904 MCost[p][r] = ( p == r ? 0.0 : 1.0 );
2941 #if defined WIN32 || defined PREDICT
3001 double NewFrac, BrWt[4];
3024 Prob[0] += Fraction * T->
Cases;
3032 if ( Dv <= T->Forks )
3060 BrWt[3] = 1 - BrWt[2];
3064 if ( (NewFrac = Fraction * BrWt[v]) >= 1E-6 )
3113 double TotWeight=0,
TotVote=0;
3163 TotWeight += 1000.0;
3191 E->
ClassWt[Best] / TotWeight);
3193 #if defined WIN32 || defined PREDICT
3199 if ( c != Best && E->
ClassWt[c] > 0 )
3234 v =
XDVal(Case, Att);
3235 Outcome = ( v == 0 ? -1 : v );
3240 Outcome = (
Unknown(Case, Att) ? -1 :
3242 CVal(Case, Att) <= OneCond->
Cut ? 2 : 3 );
3247 v =
XDVal(Case, Att);
3335 for ( ri = 0 ; (r = RT->
Fire[ri]) ; ri++ )
3341 if ( ! RT->
Branch )
return;
3414 double ExpCost, BestCost=1E10;
3422 if ( ! Prob[c] )
continue;
3426 if ( ExpCost < BestCost )
3437 if ( Prob[c] > Prob[BestClass] ) BestClass = c;
3464 ExpCost += LocalFreq[c] *
MCost[C][c];
3502 return ( Val <= T->Lower ? 1.0 :
3503 Val >= T->
Upper ? 0.0 :
3522 strcat(
Fn, Extension);
3523 return fopen(
Fn, RW);
3539 static char *LastExt=
"";
3541 if ( !
TRf || strcmp(LastExt, Extension) )
3543 LastExt = Extension;
3572 if ( OptNo >= Argc )
return '\00';
3574 if ( *(Option = Argv[OptNo++]) !=
'-' )
return '?';
3576 for ( i = 0 ; Options[i] ; i++ )
3578 if ( Options[i] == Option[1] )
3580 OptArg = (
char *) ( Options[i+1] !=
'+' ?
Nil :
3581 Option[2] ? Option+2 :
3582 OptNo < Argc ? Argv[OptNo++] :
"0" );
3605 if ( ! Bytes || (p = (
void *) malloc(Bytes)) )
3624 if ( ! Bytes )
return Nil;
3626 if ( ! Present )
return Pmalloc(Bytes);
3628 if ( (p = (
void *) realloc(Present, Bytes)) )
3647 if ( ! Number || (p = (
void *) calloc(Number, Size)) )
3671 Boolean Quit=
false, WarningOnly=
false;
3672 char Buffer[10000], *Msg=Buffer;
3675 if ( ErrNo ==
NOMEM )
3677 MessageBox(NULL,
"Cannot allocate sufficient memory",
"Fatal Error",
3678 MB_ICONERROR | MB_OK);
3686 sprintf(Msg,
"File %s is incompatible with .names file\n(%s `%s')",
3688 MessageBox(NULL, Msg,
"Cannot Load Classifier",
3689 MB_ICONERROR | MB_OK);
3696 if (
Of ) fprintf(
Of,
"\n");
3700 sprintf(Msg,
"*** ");
3706 Msg += strlen(Buffer);
3846 sprintf(Msg,
" (%s `%s')\n", S1, S2);
3854 fprintf(
Of, Buffer);
3859 MessageBox(NULL, Buffer, ( WarningOnly ?
"Warning" :
"Error" ), MB_OK);
3862 fprintf(
Of, Buffer);
3865 if ( ! WarningOnly )
ErrMsgs++;
3869 #if defined WIN32 && ! defined _CONSOLE
3870 MessageBox(NULL,
T_ErrorLimit,
"Too many errors!", MB_OK);
3895 double RoundErr, Accuracy;
3898 Accuracy = fabs(Val) * 1E-6;
3899 Val = modf(Val, &RoundErr);
3901 for ( Mult = 100000 ; Mult >= 1 ; Mult /= 10 )
3903 RoundErr = fabs(rint(Val * Mult) / Mult - Val);
3904 if ( RoundErr > 2 * Accuracy )
3930 if ( ! isdigit(*S) )
return 0;
3932 Result = Result * 10 + (*S++ -
'0');
3943 int Year, Month, Day;
3945 if ( strlen(DS) != 10 )
return 0;
3951 if ( ! ( DS[4] ==
'/' && DS[7] ==
'/' || DS[4] ==
'-' && DS[7] ==
'-' ) ||
3952 Year < 0 || Month < 1 || Day < 1 ||
3956 ( Month == 4 || Month == 6 || Month == 9 || Month == 11 ) ||
3959 Day > 28 && ( Year % 4 != 0 ||
3960 Year % 100 == 0 && Year % 400 != 0 ) ) )
3965 if ( (Month -= 2) <= 0 )
3971 return Year * 365 + Year / 4 - Year / 100 + Year / 400
3981 int Hour, Mins, Secs;
3983 if ( strlen(TS) != 8 )
return -1;
3989 if ( TS[2] !=
':' || TS[5] !=
':' ||
3990 Hour >= 24 || Mins >= 60 || Secs >= 60 )
3995 return Hour * 3600 + Mins * 60 + Secs;
4004 TSBase = y * 365 + y / 4 - y / 100 + y / 400 + (367 * 4) / 12 + 1 - 30;
4016 if ( strlen(TS) < 19 || !
Space(TS[10]) )
return (1 << 30);
4026 for ( i = 11 ; TS[i] &&
Space(TS[i]) ; i++ )
4035 return ( Day < 1 || Sec < 0 ? (1 << 30) :
4036 (Day -
TSBase) * 1440 + (Sec + 30) / 60 );
4274 while ( First <= Last )