mageec  0.1.0
MAchine Guided Energy Efficient Compilation
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
classify-hooks.c
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Source code for use with See5/C5.0 Release 2.10 */
4 /* ----------------------------------------------- */
5 /* Copyright RuleQuest Research 2013 */
6 /* */
7 /* This code is provided "as is" without warranty of any kind, */
8 /* either express or implied. All use is at your own risk. */
9 /* */
10 /*************************************************************************/
11 
12 
13 #define MAXLINEBUFFER 10000
15 
16 
17 /*************************************************************************/
18 /* */
19 /* Read a name from file f into string s, setting Delimiter. */
20 /* */
21 /* - Embedded periods are permitted, but periods followed by space */
22 /* characters act as delimiters. */
23 /* - Embedded spaces are permitted, but multiple spaces are */
24 /* replaced by a single space. */
25 /* - Any character can be escaped by '\'. */
26 /* - The remainder of a line following '|' is ignored. */
27 /* */
28 /*************************************************************************/
29 
30 
31 Boolean ReadName(FILE *f, String s, int n, char ColonOpt)
32 /* -------- */
33 {
34  register char *Sp=s;
35  register int c;
36  char Msg[2];
37 
38  /* Skip to first non-space character */
39 
40  while ( (c = InChar(f)) == '|' || Space(c) )
41  {
42  if ( c == '|' ) SkipComment;
43  }
44 
45  /* Return false if no names to read */
46 
47  if ( c == EOF )
48  {
49  Delimiter = EOF;
50  return false;
51  }
52 
53  /* Read in characters up to the next delimiter */
54 
55  while ( c != ColonOpt && c != ',' && c != '\n' && c != '|' && c != EOF )
56  {
57  if ( --n <= 0 )
58  {
59  if ( Of ) Error(LONGNAME, "", "");
60  }
61 
62  if ( c == '.' )
63  {
64  if ( (c = InChar(f)) == '|' || Space(c) || c == EOF ) break;
65  *Sp++ = '.';
66  continue;
67  }
68 
69  if ( c == '\\' )
70  {
71  c = InChar(f);
72  }
73 
74  if ( Space(c) )
75  {
76  *Sp++ = ' ';
77 
78  while ( ( c = InChar(f) ) == ' ' || c == '\t' )
79  ;
80  }
81  else
82  {
83  *Sp++ = c;
84  c = InChar(f);
85  }
86  }
87 
88  if ( c == '|' ) SkipComment;
89  Delimiter = c;
90 
91  /* Special case for ':=' */
92 
93  if ( Delimiter == ':' )
94  {
95  if ( *LBp == '=' )
96  {
97  Delimiter = '=';
98  LBp++;
99  }
100  }
101 
102  /* Strip trailing spaces */
103 
104  while ( Sp > s && Space(*(Sp-1)) ) Sp--;
105 
106  if ( Sp == s )
107  {
108  Msg[0] = ( Space(c) ? '.' : c );
109  Msg[1] = '\00';
110  Error(MISSNAME, Fn, Msg);
111  }
112 
113  *Sp++ = '\0';
114  return true;
115 }
116 
117 
118 
119 /*************************************************************************/
120 /* */
121 /* Read names of classes, attributes and legal attribute values. */
122 /* On completion, names are stored in: */
123 /* ClassName - class names */
124 /* AttName - attribute names */
125 /* AttValName - attribute value names */
126 /* with: */
127 /* MaxAttVal - number of values for each attribute */
128 /* */
129 /* Other global variables set are: */
130 /* MaxAtt - maximum attribute number */
131 /* MaxClass - maximum class number */
132 /* */
133 /* Note: until the number of attributes is known, the name */
134 /* information is assembled in local arrays */
135 /* */
136 /*************************************************************************/
137 
138 
139 void GetNames(FILE *Nf)
140 /* -------- */
141 {
142  char Buffer[1000]="", *EndBuff;
143  int AttCeiling=100, ClassCeiling=100;
144  Attribute Att;
145  ClassNo c;
146 
147  ErrMsgs = AttExIn = 0;
148  LineNo = 0;
149  LBp = LineBuffer;
150  *LBp = 0;
151 
152  MaxClass = ClassAtt = LabelAtt = CWtAtt = 0;
153 
154  /* Get class names from names file. This entry can be:
155  - a list of discrete values separated by commas
156  - the name of the discrete attribute to use as the class
157  - the name of a continuous attribute followed by a colon and
158  a comma-separated list of thresholds used to segment it */
159 
160  ClassName = AllocZero(ClassCeiling, String);
161  do
162  {
163  ReadName(Nf, Buffer, 1000, ':');
164 
165  if ( ++MaxClass >= ClassCeiling)
166  {
167  ClassCeiling += 100;
168  Realloc(ClassName, ClassCeiling, String);
169  }
170  ClassName[MaxClass] = strdup(Buffer);
171  }
172  while ( Delimiter == ',' );
173 
174  if ( Delimiter == ':' )
175  {
176  /* Thresholds for continuous class attribute */
177 
178  ClassThresh = Alloc(ClassCeiling, ContValue);
179  MaxClass = 0;
180 
181  do
182  {
183  ReadName(Nf, Buffer, 1000, ':');
184 
185  if ( ++MaxClass >= ClassCeiling)
186  {
187  ClassCeiling += 100;
188  Realloc(ClassThresh, ClassCeiling, ContValue);
189  }
190 
191  ClassThresh[MaxClass] = strtod(Buffer, &EndBuff);
192  if ( EndBuff == Buffer || *EndBuff != '\0' )
193  {
194  Error(BADCLASSTHRESH, Buffer, Nil);
195  }
196  else
197  if ( MaxClass > 1 &&
199  {
200  Error(LEQCLASSTHRESH, Buffer, Nil);
201  }
202  }
203  while ( Delimiter == ',' );
204  }
205 
206  /* Get attribute and attribute value names from names file */
207 
208  AttName = AllocZero(AttCeiling, String);
209  MaxAttVal = AllocZero(AttCeiling, DiscrValue);
210  AttValName = AllocZero(AttCeiling, String *);
211  SpecialStatus = AllocZero(AttCeiling, char);
212  AttDef = AllocZero(AttCeiling, Definition);
213 
214  MaxAtt = 0;
215  while ( ReadName(Nf, Buffer, 1000, ':') )
216  {
217  if ( Delimiter != ':' && Delimiter != '=' )
218  {
219  Error(BADATTNAME, Buffer, "");
220  }
221 
222  /* Check for attributes included/excluded */
223 
224  if ( ( *Buffer == 'a' || *Buffer == 'A' ) &&
225  ! memcmp(Buffer+1, "ttributes ", 10) &&
226  ! memcmp(Buffer+strlen(Buffer)-6, "cluded", 6) )
227  {
228  AttExIn = ( ! memcmp(Buffer+strlen(Buffer)-8, "in", 2) ? 1 : -1 );
229  if ( AttExIn == 1 )
230  {
231  ForEach(Att, 1, MaxAtt)
232  {
233  SpecialStatus[Att] |= SKIP;
234  }
235  }
236 
237  while ( ReadName(Nf, Buffer, 1000, ':') )
238  {
239  Att = Which(Buffer, AttName, 1, MaxAtt);
240  if ( ! Att )
241  {
242  Error(UNKNOWNATT, Buffer, Nil);
243  }
244  else
245  if ( AttExIn == 1 )
246  {
247  SpecialStatus[Att] -= SKIP;
248  }
249  else
250  {
251  SpecialStatus[Att] |= SKIP;
252  }
253  }
254 
255  break;
256  }
257 
258  if ( Which(Buffer, AttName, 1, MaxAtt) > 0 )
259  {
260  Error(DUPATTNAME, Buffer, Nil);
261  }
262 
263  if ( ++MaxAtt >= AttCeiling )
264  {
265  AttCeiling += 100;
266  Realloc(AttName, AttCeiling, String);
267  Realloc(MaxAttVal, AttCeiling, DiscrValue);
268  Realloc(AttValName, AttCeiling, String *);
269  Realloc(SpecialStatus, AttCeiling, char);
270  Realloc(AttDef, AttCeiling, Definition);
271  }
272 
273  AttName[MaxAtt] = strdup(Buffer);
275  AttDef[MaxAtt] = Nil;
276  MaxAttVal[MaxAtt] = 0;
277 
278  if ( Delimiter == '=' )
279  {
280  if ( MaxClass == 1 && ! strcmp(ClassName[1], AttName[MaxAtt]) )
281  {
282  Error(BADDEF3, Nil, Nil);
283  }
284 
285  ImplicitAtt(Nf);
286  }
287  else
288  {
289  ExplicitAtt(Nf);
290  }
291 
292  /* Check for case weight attribute, which must be type continuous */
293 
294  if ( ! strcmp(AttName[MaxAtt], "case weight") )
295  {
296  CWtAtt = MaxAtt;
297 
298  if ( ! Continuous(CWtAtt) )
299  {
300  Error(CWTATTERR, "", "");
301  }
302  }
303  }
304 
305  /* Check whether class is one of the attributes */
306 
307  if ( MaxClass == 1 || ClassThresh )
308  {
309  /* Class attribute must be present and must be either
310  a discrete attribute or a thresholded continuous attribute */
311 
313 
314  if ( ClassAtt <= 0 || Exclude(ClassAtt) )
315  {
316  Error(NOTARGET, ClassName[1], "");
317  }
318  else
319  if ( ClassThresh &&
320  ( ! Continuous(ClassAtt) ||
322  {
323  Error(BADCTARGET, ClassName[1], "");
324  }
325  else
326  if ( ! ClassThresh &&
328  {
329  Error(BADDTARGET, ClassName[1], "");
330  }
331 
332  Free(ClassName[1]);
333 
334  if ( ! ClassThresh )
335  {
336  Free(ClassName);
339  }
340  else
341  {
342  /* Set up class names as segments of continuous target att */
343 
344  MaxClass++;
346 
347  sprintf(Buffer, "%s <= %g", AttName[ClassAtt], ClassThresh[1]);
348  ClassName[1] = strdup(Buffer);
349 
350  ForEach(c, 2, MaxClass-1)
351  {
352  sprintf(Buffer, "%g < %s <= %g",
353  ClassThresh[c-1], AttName[ClassAtt], ClassThresh[c]);
354  ClassName[c] = strdup(Buffer);
355  }
356 
357  sprintf(Buffer, "%s > %g",
358  AttName[ClassAtt], ClassThresh[MaxClass-1]);
359  ClassName[MaxClass] = strdup(Buffer);
360  }
361  }
362 
363  /* Ignore case weight attribute if it is excluded; otherwise,
364  it cannot be used in models */
365 
366  if ( CWtAtt )
367  {
368  if ( Skip(CWtAtt) )
369  {
370  CWtAtt = 0;
371  }
372  else
373  {
375  }
376  }
377 
378  ClassName[0] = "?";
379 
380  fclose(Nf);
381 
382  if ( ErrMsgs > 0 ) Goodbye(1);
383 }
384 
385 
386 
387 /*************************************************************************/
388 /* */
389 /* Continuous or discrete attribute */
390 /* */
391 /*************************************************************************/
392 
393 
394 void ExplicitAtt(FILE *Nf)
395 /* ----------- */
396 {
397  char Buffer[1000]="", *p;
398  DiscrValue v;
399  int ValCeiling=100, BaseYear;
400  time_t clock;
401 
402  /* Read attribute type or first discrete value */
403 
404  if ( ! ( ReadName(Nf, Buffer, 1000, ':') ) )
405  {
406  Error(EOFINATT, AttName[MaxAtt], "");
407  }
408 
409  MaxAttVal[MaxAtt] = 0;
410 
411  if ( Delimiter != ',' )
412  {
413  /* Typed attribute */
414 
415  if ( ! strcmp(Buffer, "continuous") )
416  {
417  }
418  else
419  if ( ! strcmp(Buffer, "timestamp") )
420  {
422 
423  /* Set the base date if not done already */
424 
425  if ( ! TSBase )
426  {
427  clock = time(0);
428  BaseYear = gmtime(&clock)->tm_year + 1900;
429  SetTSBase(BaseYear);
430  }
431  }
432  else
433  if ( ! strcmp(Buffer, "date") )
434  {
436  }
437  else
438  if ( ! strcmp(Buffer, "time") )
439  {
441  }
442  else
443  if ( ! memcmp(Buffer, "discrete", 8) )
444  {
446 
447  /* Read max values and reserve space */
448 
449  v = atoi(&Buffer[8]);
450  if ( v < 2 )
451  {
453  }
454 
455  AttValName[MaxAtt] = Alloc(v+3, String);
456  AttValName[MaxAtt][0] = (char *) (long) v+1;
457  AttValName[MaxAtt][(MaxAttVal[MaxAtt]=1)] = strdup("N/A");
458  }
459  else
460  if ( ! strcmp(Buffer, "ignore") )
461  {
463  }
464  else
465  if ( ! strcmp(Buffer, "label") )
466  {
467  LabelAtt = MaxAtt;
469  }
470  else
471  {
472  /* Cannot have only one discrete value for an attribute */
473 
474  Error(SINGLEATTVAL, AttName[MaxAtt], Buffer);
475  }
476  }
477  else
478  {
479  /* Discrete attribute with explicit values */
480 
481  AttValName[MaxAtt] = AllocZero(ValCeiling, String);
482 
483  /* Add "N/A" unless this attribute is the class */
484 
485  if ( MaxClass > 1 || strcmp(ClassName[1], AttName[MaxAtt]) )
486  {
487  AttValName[MaxAtt][(MaxAttVal[MaxAtt]=1)] = strdup("N/A");
488  }
489  else
490  {
491  MaxAttVal[MaxAtt] = 0;
492  }
493 
494  p = Buffer;
495 
496  /* Special check for ordered attribute */
497 
498  if ( ! memcmp(Buffer, "[ordered]", 9) )
499  {
501 
502  for ( p = Buffer+9 ; Space(*p) ; p++ )
503  ;
504  }
505 
506  /* Record first real explicit value */
507 
508  AttValName[MaxAtt][++MaxAttVal[MaxAtt]] = strdup(p);
509 
510  /* Record remaining values */
511 
512  do
513  {
514  if ( ! ( ReadName(Nf, Buffer, 1000, ':') ) )
515  {
516  Error(EOFINATT, AttName[MaxAtt], "");
517  }
518 
519  if ( ++MaxAttVal[MaxAtt] >= ValCeiling )
520  {
521  ValCeiling += 100;
522  Realloc(AttValName[MaxAtt], ValCeiling, String);
523  }
524 
525  AttValName[MaxAtt][MaxAttVal[MaxAtt]] = strdup(Buffer);
526  }
527  while ( Delimiter == ',' );
528 
529  /* Cancel ordered status if <3 real values */
530 
531  if ( Ordered(MaxAtt) && MaxAttVal[MaxAtt] <= 3 )
532  {
533  SpecialStatus[MaxAtt] = 0;
534  }
535  }
536 }
537 
538 
539 
540 /*************************************************************************/
541 /* */
542 /* Locate value Val in List[First] to List[Last] */
543 /* */
544 /*************************************************************************/
545 
546 
547 int Which(String Val, String *List, int First, int Last)
548 /* ----- */
549 {
550  int n=First;
551 
552  while ( n <= Last && strcmp(Val, List[n]) ) n++;
553 
554  return ( n <= Last ? n : First-1 );
555 }
556 
557 
558 
559 /*************************************************************************/
560 /* */
561 /* Read next char keeping track of line numbers */
562 /* */
563 /*************************************************************************/
564 
565 
566 int InChar(FILE *f)
567 /* ------ */
568 {
569  if ( ! *LBp )
570  {
571  LBp = LineBuffer;
572 
573  if ( ! fgets(LineBuffer, MAXLINEBUFFER, f) )
574  {
575  LineBuffer[0] = '\00';
576  return EOF;
577  }
578 
579  LineNo++;
580  }
581 
582  return (int) *LBp++;
583 }
584 
585 
586 
587 /*************************************************************************/
588 /* */
589 /* Read a raw case from file Df. */
590 /* */
591 /* For each attribute, read the attribute value from the file. */
592 /* If it is a discrete valued attribute, find the associated no. */
593 /* of this attribute value (if the value is unknown this is 0). */
594 /* */
595 /* Returns the array of attribute values. */
596 /* */
597 /*************************************************************************/
598 
599 
600 #define XError(a,b,c) Error(a,b,c)
601 
602 
603 DataRec GetDataRec(FILE *Df, Boolean Train)
604 /* ---------- */
605 {
606  Attribute Att;
607  char Name[1000], *EndName;
608  int Dv;
609  DataRec Dummy, DVec;
610  ContValue Cv;
611  Boolean FirstValue=true;
612 
613 
614  if ( ReadName(Df, Name, 1000, '\00') )
615  {
616  Dummy = AllocZero(MaxAtt+2, AttValue);
617  DVec = &Dummy[1];
618  ForEach(Att, 1, MaxAtt)
619  {
620  if ( AttDef[Att] )
621  {
622  DVec[Att] = EvaluateDef(AttDef[Att], DVec);
623 
624  if ( Continuous(Att) )
625  {
626  CheckValue(DVec, Att);
627  }
628 
629  if ( SomeMiss )
630  {
631  SomeMiss[Att] |= Unknown(DVec, Att);
632  SomeNA[Att] |= NotApplic(DVec, Att);
633  }
634 
635  continue;
636  }
637 
638  /* Get the attribute value if don't already have it */
639 
640  if ( ! FirstValue && ! ReadName(Df, Name, 1000, '\00') )
641  {
642  XError(HITEOF, AttName[Att], "");
643  FreeLastCase(DVec);
644  return Nil;
645  }
646  FirstValue = false;
647 
648  if ( Exclude(Att) )
649  {
650  if ( Att == LabelAtt )
651  {
652  /* Record the value as a string */
653 
654  SVal(DVec,Att) = StoreIVal(Name);
655  }
656  }
657  else
658  if ( ! strcmp(Name, "?") )
659  {
660  /* Set marker to indicate missing value */
661 
662  DVal(DVec, Att) = UNKNOWN;
663  if ( SomeMiss ) SomeMiss[Att] = true;
664  }
665  else
666  if ( Att != ClassAtt && ! strcmp(Name, "N/A") )
667  {
668  /* Set marker to indicate not applicable */
669 
670  DVal(DVec, Att) = NA;
671  if ( SomeNA ) SomeNA[Att] = true;
672  }
673  else
674  if ( Discrete(Att) )
675  {
676  /* Discrete attribute */
677 
678  Dv = Which(Name, AttValName[Att], 1, MaxAttVal[Att]);
679  if ( ! Dv )
680  {
681  if ( StatBit(Att, DISCRETE) )
682  {
683  if ( Train )
684  {
685  /* Add value to list */
686 
687  if ( MaxAttVal[Att] >= (long) AttValName[Att][0] )
688  {
689  XError(TOOMANYVALS, AttName[Att],
690  (char *) AttValName[Att][0] - 1);
691  Dv = MaxAttVal[Att];
692  }
693  else
694  {
695  Dv = ++MaxAttVal[Att];
696  AttValName[Att][Dv] = strdup(Name);
697  AttValName[Att][Dv+1] = "<other>"; /* no free */
698  }
699  }
700  else
701  {
702  /* Set value to "<other>" */
703 
704  Dv = MaxAttVal[Att] + 1;
705  }
706  }
707  else
708  {
709  XError(BADATTVAL, AttName[Att], Name);
710  Dv = UNKNOWN;
711  }
712  }
713  DVal(DVec, Att) = Dv;
714  }
715  else
716  {
717  /* Continuous value */
718 
719  if ( TStampVal(Att) )
720  {
721  CVal(DVec, Att) = Cv = TStampToMins(Name);
722  if ( Cv >= 1E9 ) /* long time in future */
723  {
724  XError(BADTSTMP, AttName[Att], Name);
725  DVal(DVec, Att) = UNKNOWN;
726  }
727  }
728  else
729  if ( DateVal(Att) )
730  {
731  CVal(DVec, Att) = Cv = DateToDay(Name);
732  if ( Cv < 1 )
733  {
734  XError(BADDATE, AttName[Att], Name);
735  DVal(DVec, Att) = UNKNOWN;
736  }
737  }
738  else
739  if ( TimeVal(Att) )
740  {
741  CVal(DVec, Att) = Cv = TimeToSecs(Name);
742  if ( Cv < 0 )
743  {
744  XError(BADTIME, AttName[Att], Name);
745  DVal(DVec, Att) = UNKNOWN;
746  }
747  }
748  else
749  {
750  CVal(DVec, Att) = strtod(Name, &EndName);
751  if ( EndName == Name || *EndName != '\0' )
752  {
753  XError(BADATTVAL, AttName[Att], Name);
754  DVal(DVec, Att) = UNKNOWN;
755  }
756  }
757 
758  CheckValue(DVec, Att);
759  }
760  }
761 
762  if ( ClassAtt )
763  {
764  if ( Discrete(ClassAtt) )
765  {
766  Class(DVec) = XDVal(DVec, ClassAtt);
767  }
768  else
769  if ( Unknown(DVec, ClassAtt) || NotApplic(DVec, ClassAtt) )
770  {
771  Class(DVec) = 0;
772  }
773  else
774  {
775  /* Find appropriate segment using class thresholds */
776 
777  Cv = CVal(DVec, ClassAtt);
778 
779  for ( Dv = 1 ; Dv < MaxClass && Cv > ClassThresh[Dv] ; Dv++ )
780  ;
781 
782  Class(DVec) = Dv;
783  }
784  }
785  else
786  {
787  if ( ! ReadName(Df, Name, 1000, '\00') )
788  {
789  XError(HITEOF, Fn, "");
790  FreeLastCase(DVec);
791  return Nil;
792  }
793 
794  Class(DVec) = Dv = Which(Name, ClassName, 1, MaxClass);
795  }
796 
797  return DVec;
798  }
799  else
800  {
801  return Nil;
802  }
803 }
804 
805 
806 
807 /*************************************************************************/
808 /* */
809 /* Store a label or ignored value in IValStore */
810 /* */
811 /*************************************************************************/
812 
813 
815 /* --------- */
816 {
817  int StartIx, Length;
818 
819  if ( (Length=strlen(S) + 1) + IValsOffset > IValsSize )
820  {
821  if ( IgnoredVals )
822  {
823  Realloc(IgnoredVals, IValsSize += 32768, char);
824  }
825  else
826  {
827  IValsSize = 32768;
828  IValsOffset = 0;
829  IgnoredVals = Alloc(IValsSize, char);
830  }
831  }
832 
833  StartIx = IValsOffset;
834  strcpy(IgnoredVals + StartIx, S);
835  IValsOffset += Length;
836 
837  return StartIx;
838 }
839 
840 
841 
842 /*************************************************************************/
843 /* */
844 /* Check for bad continuous value */
845 /* */
846 /*************************************************************************/
847 
848 
849 void CheckValue(DataRec DVec, Attribute Att)
850 /* ---------- */
851 {
852  ContValue Cv;
853 
854  Cv = CVal(DVec, Att);
855  if ( ! finite(Cv) )
856  {
857  Error(BADNUMBER, AttName[Att], "");
858 
859  CVal(DVec, Att) = UNKNOWN;
860  }
861 }
862 
863 
864 
865 /*************************************************************************/
866 /* */
867 /* Routines to handle implicitly-defined attributes */
868 /* */
869 /*************************************************************************/
870 
871 
872 char *Buff; /* buffer for input characters */
873 int BuffSize, BN; /* size and index of next character */
874 
875 EltRec *TStack; /* expression stack model */
876 int TStackSize, TSN; /* size of stack and index of next entry */
877 
878 int DefSize, DN; /* size of definition and next element */
879 
880 Boolean PreviousError; /* to avoid parasytic errors */
881 
882 AttValue _UNK, /* quasi-constant for unknown value */
883  _NA; /* ditto for not applicable */
884 
885 
886 #define FailSyn(Msg) {DefSyntaxError(Msg); return false;}
887 #define FailSem(Msg) {DefSemanticsError(Fi, Msg, OpCode); return false;}
888 
889 typedef union _xstack_elt
890  {
894  }
895  XStackElt;
896 
897 #define cval _cont_val
898 #define sval _string_val
899 #define dval _discr_val
900 
901 
902 
903 /*************************************************************************/
904 /* */
905 /* A definition is handled in two stages: */
906 /* - The definition is read (up to a line ending with a period) */
907 /* replacing multiple whitespace characters with one space */
908 /* - The definition is then read (using a recursive descent */
909 /* parser), building up a reverse polish expression */
910 /* Syntax and semantics errors are flagged */
911 /* */
912 /*************************************************************************/
913 
914 
915 void ImplicitAtt(FILE *Nf)
916 /* ----------- */
917 {
918 #ifdef CUBIST
919  _UNK.cval = UNKNOWN;
920 #else
921  _UNK.dval = UNKNOWN;
922 #endif
923  _NA.dval = NA;
924 
925  /* Get definition as a string in Buff */
926 
927  ReadDefinition(Nf);
928 
929  PreviousError = false;
930  BN = 0;
931 
932  /* Allocate initial stack and attribute definition */
933 
934  TStack = Alloc(TStackSize=50, EltRec);
935  TSN = 0;
936 
937  AttDef[MaxAtt] = Alloc(DefSize = 100, DefElt);
938  DN = 0;
939 
940  /* Parse Buff as an expression terminated by a period */
941 
942  Expression();
943  if ( ! Find(".") ) DefSyntaxError("'.' ending definition");
944 
945  /* Final check -- defined attribute must not be of type String */
946 
947  if ( ! PreviousError )
948  {
949  if ( DN == 1 && DefOp(AttDef[MaxAtt][0]) == OP_ATT &&
950  strcmp(AttName[MaxAtt], "case weight") )
951  {
952  Error(SAMEATT, AttName[ (long) DefSVal(AttDef[MaxAtt][0]) ], Nil);
953  }
954 
955  if ( TStack[0].Type == 'B' )
956  {
957  /* Defined attributes should never have a value N/A */
958 
959  MaxAttVal[MaxAtt] = 3;
961  AttValName[MaxAtt][1] = strdup("??");
962  AttValName[MaxAtt][2] = strdup("t");
963  AttValName[MaxAtt][3] = strdup("f");
964  }
965  else
966  {
967  MaxAttVal[MaxAtt] = 0;
968  }
969  }
970 
971  if ( PreviousError )
972  {
973  DN = 0;
975  }
976 
977  /* Write a terminating marker */
978 
979  DefOp(AttDef[MaxAtt][DN]) = OP_END;
980 
981  Free(Buff);
982  Free(TStack);
983 }
984 
985 
986 
987 /*************************************************************************/
988 /* */
989 /* Read the text of a definition. Skip comments, collapse */
990 /* multiple whitespace characters. */
991 /* */
992 /*************************************************************************/
993 
994 
995 void ReadDefinition(FILE *f)
996 /* -------------- */
997 {
998  Boolean LastWasPeriod=false;
999  char c;
1000 
1001  Buff = Alloc(BuffSize=50, char);
1002  BN = 0;
1003 
1004  while ( true )
1005  {
1006  c = InChar(f);
1007 
1008  if ( c == '|' ) SkipComment;
1009 
1010  if ( c == EOF || c == '\n' && LastWasPeriod )
1011  {
1012  /* The definition is complete. Add a period if it's
1013  not there already and terminate the string */
1014 
1015  if ( ! LastWasPeriod ) Append('.');
1016  Append(0);
1017 
1018  return;
1019  }
1020 
1021  if ( Space(c) )
1022  {
1023  Append(' ');
1024  }
1025  else
1026  if ( c == '\\' )
1027  {
1028  /* Escaped character -- bypass any special meaning */
1029 
1030  Append(InChar(f));
1031  }
1032  else
1033  {
1034  LastWasPeriod = ( c == '.' );
1035  Append(c);
1036  }
1037  }
1038 }
1039 
1040 
1041 
1042 /*************************************************************************/
1043 /* */
1044 /* Append a character to Buff, resizing it if necessary */
1045 /* */
1046 /*************************************************************************/
1047 
1048 
1049 void Append(char c)
1050 /* ------ */
1051 {
1052  if ( c == ' ' && (! BN || Buff[BN-1] == ' ' ) ) return;
1053 
1054  if ( BN >= BuffSize )
1055  {
1056  Realloc(Buff, BuffSize += 50, char);
1057  }
1058 
1059  Buff[BN++] = c;
1060 }
1061 
1062 
1063 
1064 /*************************************************************************/
1065 /* */
1066 /* Recursive descent parser with syntax error checking. */
1067 /* The reverse polish is built up by calls to Dump() and DumpOp(), */
1068 /* which also check for semantic validity. */
1069 /* */
1070 /* For possible error messages, each routine also keeps track of */
1071 /* the beginning of the construct that it recognises (in Fi). */
1072 /* */
1073 /*************************************************************************/
1074 
1075 
1077 /* ---------- */
1078 {
1079  int Fi=BN;
1080 
1081  if ( Buff[BN] == ' ' ) BN++;
1082 
1083  if ( ! Conjunct() ) FailSyn("expression");
1084 
1085  while ( Find("or") )
1086  {
1087  BN += 2;
1088 
1089  if ( ! Conjunct() ) FailSyn("expression");
1090 
1091  DumpOp(OP_OR, Fi);
1092  }
1093 
1094  return true;
1095 }
1096 
1097 
1098 
1100 /* -------- */
1101 {
1102  int Fi=BN;
1103 
1104  if ( ! SExpression() ) FailSyn("expression");
1105 
1106  while ( Find("and") )
1107  {
1108  BN += 3;
1109 
1110  if ( ! SExpression() ) FailSyn("expression");
1111 
1112  DumpOp(OP_AND, Fi);
1113  }
1114 
1115  return true;
1116 }
1117 
1118 
1119 
1120 String RelOps[] = {">=", "<=", "!=", "<>", ">", "<", "=", (String) 0};
1121 
1123 /* ----------- */
1124 {
1125  int o, Fi=BN;
1126 
1127  if ( ! AExpression() ) FailSyn("expression");
1128 
1129  if ( (o = FindOne(RelOps)) >= 0 )
1130  {
1131  BN += strlen(RelOps[o]);
1132 
1133  if ( ! AExpression() ) FailSyn("expression");
1134 
1135  DumpOp(( o == 0 ? OP_GE :
1136  o == 1 ? OP_LE :
1137  o == 4 ? OP_GT :
1138  o == 5 ? OP_LT :
1139  o == 2 || o == 3 ?
1140  ( TStack[TSN-1].Type == 'S' ? OP_SNE : OP_NE ) :
1141  ( TStack[TSN-1].Type == 'S' ? OP_SEQ : OP_EQ ) ), Fi);
1142  }
1143 
1144  return true;
1145 }
1146 
1147 
1148 
1149 String AddOps[] = {"+", "-", (String) 0};
1150 
1152 /* ----------- */
1153 {
1154  int o, Fi=BN;
1155 
1156  if ( Buff[BN] == ' ' ) BN++;
1157 
1158  if ( (o = FindOne(AddOps)) >= 0 )
1159  {
1160  BN += 1;
1161  }
1162 
1163  if ( ! Term() ) FailSyn("expression");
1164 
1165  if ( o == 1 ) DumpOp(OP_UMINUS, Fi);
1166 
1167  while ( (o = FindOne(AddOps)) >= 0 )
1168  {
1169  BN += 1;
1170 
1171  if ( ! Term() ) FailSyn("arithmetic expression");
1172 
1173  DumpOp((char)(OP_PLUS + o), Fi);
1174  }
1175 
1176  return true;
1177 }
1178 
1179 
1180 
1181 String MultOps[] = {"*", "/", "%", (String) 0};
1182 
1184 /* ---- */
1185 {
1186  int o, Fi=BN;
1187 
1188  if ( ! Factor() ) FailSyn("expression");
1189 
1190  while ( (o = FindOne(MultOps)) >= 0 )
1191  {
1192  BN += 1;
1193 
1194  if ( ! Factor() ) FailSyn("arithmetic expression");
1195 
1196  DumpOp((char)(OP_MULT + o), Fi);
1197  }
1198 
1199  return true;
1200 }
1201 
1202 
1203 
1205 /* ---- */
1206 {
1207  int Fi=BN;
1208 
1209  if ( ! Primary() ) FailSyn("value");
1210 
1211  while ( Find("^") )
1212  {
1213  BN += 1;
1214 
1215  if ( ! Primary() ) FailSyn("exponent");
1216 
1217  DumpOp(OP_POW, Fi);
1218  }
1219 
1220  return true;
1221 }
1222 
1223 
1224 
1226 /* ------- */
1227 {
1228  if ( Atom() )
1229  {
1230  return true;
1231  }
1232  else
1233  if ( Find("(") )
1234  {
1235  BN++;
1236  if ( ! Expression() ) FailSyn("expression in parentheses");
1237  if ( ! Find(")") ) FailSyn("')'");
1238  BN++;
1239  return true;
1240  }
1241  else
1242  {
1243  FailSyn("attribute, value, or '('");
1244  }
1245 }
1246 
1247 
1248 
1249 String Funcs[] = {"sin", "cos", "tan", "log", "exp", "int", (String) 0};
1250 
1252 /* ---- */
1253 {
1254  char *EndPtr, *Str, Date[11], Time[9];
1255  int o, FirstBN, Fi=BN;
1256  ContValue F;
1257  Attribute Att;
1258 
1259  if ( Buff[BN] == ' ' ) BN++;
1260 
1261  if ( Buff[BN] == '"' )
1262  {
1263  FirstBN = ++BN;
1264  while ( Buff[BN] != '"' )
1265  {
1266  if ( ! Buff[BN] ) FailSyn("closing '\"'");
1267  BN++;
1268  }
1269 
1270  /* Make a copy of the string without double quotes */
1271 
1272  Buff[BN] = '\00';
1273  Str = strdup(Buff + FirstBN);
1274 
1275  Buff[BN++] = '"';
1276  Dump(OP_STR, 0, Str, Fi);
1277  }
1278  else
1279  if ( (Att = FindAttName()) )
1280  {
1281  BN += strlen(AttName[Att]);
1282 
1283  Dump(OP_ATT, 0, (String) (long) Att, Fi);
1284  }
1285  else
1286  if ( isdigit(Buff[BN]) )
1287  {
1288  /* Check for date or time first */
1289 
1290  if ( ( Buff[BN+4] == '/' && Buff[BN+7] == '/' ||
1291  Buff[BN+4] == '-' && Buff[BN+7] == '-' )&&
1292  isdigit(Buff[BN+1]) && isdigit(Buff[BN+2]) &&
1293  isdigit(Buff[BN+3]) &&
1294  isdigit(Buff[BN+5]) && isdigit(Buff[BN+6]) &&
1295  isdigit(Buff[BN+8]) && isdigit(Buff[BN+9]) )
1296  {
1297  memcpy(Date, Buff+BN, 10);
1298  Date[10] = '\00';
1299  if ( (F = DateToDay(Date)) == 0 )
1300  {
1301  Error(BADDEF1, Date, "date");
1302  }
1303 
1304  BN += 10;
1305  }
1306  else
1307  if ( Buff[BN+2] == ':' && Buff[BN+5] == ':' &&
1308  isdigit(Buff[BN+1]) &&
1309  isdigit(Buff[BN+3]) && isdigit(Buff[BN+4]) &&
1310  isdigit(Buff[BN+6]) && isdigit(Buff[BN+7]) )
1311  {
1312  memcpy(Time, Buff+BN, 8);
1313  Time[8] = '\00';
1314  if ( (F = TimeToSecs(Time)) == 0 )
1315  {
1316  Error(BADDEF1, Time, "time");
1317  }
1318 
1319  BN += 8;
1320  }
1321  else
1322  {
1323  F = strtod(Buff+BN, &EndPtr);
1324 
1325  /* Check for period after integer */
1326 
1327  if ( EndPtr > Buff+BN+1 && *(EndPtr-1) == '.' )
1328  {
1329  EndPtr--;
1330  }
1331 
1332  BN = EndPtr - Buff;
1333  }
1334 
1335  Dump(OP_NUM, F, Nil, Fi);
1336  }
1337  else
1338  if ( (o = FindOne(Funcs)) >= 0 )
1339  {
1340  BN += 3;
1341 
1342  if ( ! Find("(") ) FailSyn("'(' after function name");
1343  BN++;
1344 
1345  if ( ! Expression() ) FailSyn("expression");
1346 
1347  if ( ! Find(")") ) FailSyn("')' after function argument");
1348  BN++;
1349 
1350  DumpOp((char)(OP_SIN + o), Fi);
1351  }
1352  else
1353  if ( Buff[BN] == '?' )
1354  {
1355  BN++;
1356  if ( TStack[TSN-1].Type == 'N' )
1357  {
1358  Dump(OP_NUM, _UNK.cval, Nil, Fi);
1359  }
1360  else
1361  {
1362  Dump(OP_STR, 0, Nil, Fi);
1363  }
1364  }
1365  else
1366  if ( ! memcmp(Buff+BN, "N/A", 3) )
1367  {
1368  BN += 3;
1369  if ( TStack[TSN-1].Type == 'N' )
1370  {
1371  Dump(OP_NUM, _NA.cval, Nil, Fi);
1372  }
1373  else
1374  {
1375  Dump(OP_STR, 0, strdup("N/A"), Fi);
1376  }
1377  }
1378  else
1379  {
1380  return false;
1381  }
1382 
1383  return true;
1384 }
1385 
1386 
1387 
1388 /*************************************************************************/
1389 /* */
1390 /* Skip spaces and check for specific string */
1391 /* */
1392 /*************************************************************************/
1393 
1394 
1396 /* ---- */
1397 {
1398  if ( Buff[BN] == ' ' ) BN++;
1399 
1400  return ( ! Buff[BN] ? false : ! memcmp(Buff+BN, S, strlen(S)) );
1401 }
1402 
1403 
1404 
1405 /*************************************************************************/
1406 /* */
1407 /* Find one of a zero-terminated list of alternatives */
1408 /* */
1409 /*************************************************************************/
1410 
1411 
1412 int FindOne(String *Alt)
1413 /* ------- */
1414 {
1415  int a;
1416 
1417  for ( a = 0 ; Alt[a] ; a++ )
1418  {
1419  if ( Find(Alt[a]) ) return a;
1420  }
1421 
1422  return -1;
1423 }
1424 
1425 
1426 
1427 /*************************************************************************/
1428 /* */
1429 /* Find an attribute name */
1430 /* */
1431 /*************************************************************************/
1432 
1433 
1435 /* ----------- */
1436 {
1437  Attribute Att, LongestAtt=0;
1438 
1439  ForEach(Att, 1, MaxAtt-1)
1440  {
1441  if ( ! Exclude(Att) && Find(AttName[Att]) )
1442  {
1443  if ( ! LongestAtt ||
1444  strlen(AttName[Att]) > strlen(AttName[LongestAtt]) )
1445  {
1446  LongestAtt = Att;
1447  }
1448  }
1449  }
1450 
1451  if ( LongestAtt && ( MaxClass == 1 || ClassThresh ) &&
1452  ! strcmp(ClassName[1], AttName[LongestAtt]) )
1453  {
1454  Error(BADDEF4, Nil, Nil);
1455  }
1456 
1457  return LongestAtt;
1458 }
1459 
1460 
1461 
1462 /*************************************************************************/
1463 /* */
1464 /* Error message routines. Syntax errors come from the */
1465 /* recursive descent parser, semantics errors from the routines */
1466 /* that build up the equivalent polish */
1467 /* */
1468 /*************************************************************************/
1469 
1470 
1472 /* -------------- */
1473 {
1474  String RestOfText;
1475  int i=10;
1476 
1477  if ( ! PreviousError )
1478  {
1479  RestOfText = Buff + BN;
1480 
1481  /* Abbreviate text if longer than 12 characters */
1482 
1483  if ( CharWidth(RestOfText) > 12 )
1484  {
1485 #ifdef UTF8
1486  /* Find beginning of UTF-8 character */
1487 
1488  for ( ; (RestOfText[i] & 0x80) ; i++)
1489  ;
1490 #endif
1491  RestOfText[i] = RestOfText[i+1] = '.';
1492  }
1493 
1494  Error(BADDEF1, RestOfText, Msg);
1495  PreviousError = true;
1496  }
1497 }
1498 
1499 
1500 
1501 void DefSemanticsError(int Fi, String Msg, int OpCode)
1502 /* ----------------- */
1503 {
1504  char Exp[1000], XMsg[1000], Op[1000];
1505 
1506  if ( ! PreviousError )
1507  {
1508  /* Abbreviate the input if necessary */
1509 
1510  if ( BN - Fi > 23 )
1511  {
1512  sprintf(Exp, "%.10s...%.10s", Buff+Fi, Buff+BN-10);
1513  }
1514  else
1515  {
1516  sprintf(Exp, "%.*s", BN - Fi, Buff+Fi);
1517  }
1518 
1519  switch ( OpCode )
1520  {
1521  case OP_AND: sprintf(Op, "%s", "and"); break;
1522  case OP_OR: sprintf(Op, "%s", "or"); break;
1523  case OP_SEQ:
1524  case OP_EQ: sprintf(Op, "%s", "="); break;
1525  case OP_SNE:
1526  case OP_NE: sprintf(Op, "%s", "<>"); break;
1527  case OP_GT: sprintf(Op, "%s", ">"); break;
1528  case OP_GE: sprintf(Op, "%s", ">="); break;
1529  case OP_LT: sprintf(Op, "%s", "<"); break;
1530  case OP_LE: sprintf(Op, "%s", "<="); break;
1531  case OP_PLUS: sprintf(Op, "%s", "+"); break;
1532  case OP_MINUS: sprintf(Op, "%s", "-"); break;
1533  case OP_UMINUS: sprintf(Op, "%s", "unary -"); break;
1534  case OP_MULT: sprintf(Op, "%s", "*"); break;
1535  case OP_DIV: sprintf(Op, "%s", "/"); break;
1536  case OP_MOD: sprintf(Op, "%s", "%"); break;
1537  case OP_POW: sprintf(Op, "%s", "^"); break;
1538  case OP_SIN: sprintf(Op, "%s", "sin"); break;
1539  case OP_COS: sprintf(Op, "%s", "cos"); break;
1540  case OP_TAN: sprintf(Op, "%s", "tan"); break;
1541  case OP_LOG: sprintf(Op, "%s", "log"); break;
1542  case OP_EXP: sprintf(Op, "%s", "exp"); break;
1543  case OP_INT: sprintf(Op, "%s", "int");
1544  }
1545 
1546  sprintf(XMsg, "%s with '%s'", Msg, Op);
1547  Error(BADDEF2, Exp, XMsg);
1548  PreviousError = true;
1549  }
1550 }
1551 
1552 
1553 
1554 /*************************************************************************/
1555 /* */
1556 /* Reverse polish routines. These use a model of the stack */
1557 /* during expression evaluation to detect type conflicts etc */
1558 /* */
1559 /*************************************************************************/
1560 
1561 
1562 
1563 void Dump(char OpCode, ContValue F, String S, int Fi)
1564 /* ---- */
1565 {
1566  if ( Buff[Fi] == ' ' ) Fi++;
1567 
1568  if ( ! UpdateTStack(OpCode, F, S, Fi) ) return;
1569 
1570  /* Make sure enough room for this element */
1571 
1572  if ( DN >= DefSize-1 )
1573  {
1574  Realloc(AttDef[MaxAtt], DefSize += 100, DefElt);
1575  }
1576 
1577  DefOp(AttDef[MaxAtt][DN]) = OpCode;
1578  if ( OpCode == OP_ATT || OpCode == OP_STR )
1579  {
1580  DefSVal(AttDef[MaxAtt][DN]) = S;
1581  }
1582  else
1583  {
1584  DefNVal(AttDef[MaxAtt][DN]) = F;
1585  }
1586 
1587  DN++;
1588 }
1589 
1590 
1591 
1592 void DumpOp(char OpCode, int Fi)
1593 /* ------ */
1594 {
1595  Dump(OpCode, 0, Nil, Fi);
1596 }
1597 
1598 
1599 
1600 Boolean UpdateTStack(char OpCode, ContValue F, String S, int Fi)
1601 /* ------------ */
1602 {
1603  if ( TSN >= TStackSize )
1604  {
1605  Realloc(TStack, TStackSize += 50, EltRec);
1606  }
1607 
1608  switch ( OpCode )
1609  {
1610  case OP_ATT:
1611  TStack[TSN].Type = ( Continuous((long) S) ? 'N' : 'S' );
1612  break;
1613 
1614  case OP_NUM:
1615  TStack[TSN].Type = 'N';
1616  break;
1617 
1618  case OP_STR:
1619  TStack[TSN].Type = 'S';
1620  break;
1621 
1622  case OP_AND:
1623  case OP_OR:
1624  if ( TStack[TSN-2].Type != 'B' || TStack[TSN-1].Type != 'B' )
1625  {
1626  FailSem("non-logical value");
1627  }
1628  TSN -= 2;
1629  break;
1630 
1631  case OP_EQ:
1632  case OP_NE:
1633  if ( TStack[TSN-2].Type != TStack[TSN-1].Type )
1634  {
1635  FailSem("incompatible values");
1636  }
1637  TSN -= 2;
1638  TStack[TSN].Type = 'B';
1639  break;
1640 
1641  case OP_GT:
1642  case OP_GE:
1643  case OP_LT:
1644  case OP_LE:
1645  if ( TStack[TSN-2].Type != 'N' || TStack[TSN-1].Type != 'N' )
1646  {
1647  FailSem("non-arithmetic value");
1648  }
1649  TSN -= 2;
1650  TStack[TSN].Type = 'B';
1651  break;
1652 
1653  case OP_SEQ:
1654  case OP_SNE:
1655  if ( TStack[TSN-2].Type != 'S' || TStack[TSN-1].Type != 'S' )
1656  {
1657  FailSem("incompatible values");
1658  }
1659  TSN -= 2;
1660  TStack[TSN].Type = 'B';
1661  break;
1662 
1663  case OP_PLUS:
1664  case OP_MINUS:
1665  case OP_MULT:
1666  case OP_DIV:
1667  case OP_MOD:
1668  case OP_POW:
1669  if ( TStack[TSN-2].Type != 'N' || TStack[TSN-1].Type != 'N' )
1670  {
1671  FailSem("non-arithmetic value");
1672  }
1673  TSN -= 2;
1674  break;
1675 
1676  case OP_UMINUS:
1677  if ( TStack[TSN-1].Type != 'N' )
1678  {
1679  FailSem("non-arithmetic value");
1680  }
1681  TSN--;
1682  break;
1683 
1684  case OP_SIN:
1685  case OP_COS:
1686  case OP_TAN:
1687  case OP_LOG:
1688  case OP_EXP:
1689  case OP_INT:
1690  if ( TStack[TSN-1].Type != 'N' )
1691  {
1692  FailSem("non-arithmetic argument");
1693  }
1694  TSN--;
1695  }
1696 
1697  TStack[TSN].Fi = Fi;
1698  TStack[TSN].Li = BN-1;
1699  TSN++;
1700 
1701  return true;
1702 }
1703 
1704 
1705 
1706 /*************************************************************************/
1707 /* */
1708 /* Evaluate an implicit attribute for a case */
1709 /* */
1710 /*************************************************************************/
1711 
1712 #define CUnknownVal(AV) (AV.cval==_UNK.cval)
1713 #define DUnknownVal(AV) (AV.dval==_UNK.dval)
1714 #define DUNA(a) (DUnknownVal(XStack[a]) || NotApplicVal(XStack[a]))
1715 #define CUNA(a) (CUnknownVal(XStack[a]) || NotApplicVal(XStack[a]))
1716 #define C1(x) (CUNA(XSN-1) ? _UNK.cval : (x))
1717 #define C2(x) (CUNA(XSN-1) || CUNA(XSN-2) ? _UNK.cval : (x))
1718 #define CD2(x) (CUNA(XSN-1) || CUNA(XSN-2) ? _UNK.dval : (x))
1719 #define D2(x) (DUNA(XSN-1) || DUNA(XSN-2) ? _UNK.dval : (x))
1720 
1721 
1723 /* ----------- */
1724 {
1725  XStackElt XStack[100]; /* allows 100-level nesting */
1726  int XSN=0, DN, bv1, bv2, Mult;
1727  double cv1, cv2;
1728  String sv1, sv2;
1729  Attribute Att;
1730  DefElt DElt;
1731  AttValue ReturnVal;
1732 
1733  for ( DN = 0 ; ; DN++)
1734  {
1735  switch ( DefOp((DElt = D[DN])) )
1736  {
1737  case OP_ATT:
1738  Att = (long) DefSVal(DElt);
1739 
1740  if ( Continuous(Att) )
1741  {
1742  XStack[XSN++].cval = CVal(Case, Att);
1743  }
1744  else
1745  {
1746  XStack[XSN++].sval =
1747  ( Unknown(Case, Att) && ! NotApplic(Case, Att) ? 0 :
1748  AttValName[Att][XDVal(Case, Att)] );
1749  }
1750  break;
1751 
1752  case OP_NUM:
1753  XStack[XSN++].cval = DefNVal(DElt);
1754  break;
1755 
1756  case OP_STR:
1757  XStack[XSN++].sval = DefSVal(DElt);
1758  break;
1759 
1760  case OP_AND:
1761  bv1 = XStack[XSN-2].dval;
1762  bv2 = XStack[XSN-1].dval;
1763  XStack[XSN-2].dval = ( bv1 == 3 || bv2 == 3 ? 3 :
1764  D2(bv1 == 2 && bv2 == 2 ? 2 : 3) );
1765  XSN--;
1766  break;
1767 
1768  case OP_OR:
1769  bv1 = XStack[XSN-2].dval;
1770  bv2 = XStack[XSN-1].dval;
1771  XStack[XSN-2].dval = ( bv1 == 2 || bv2 == 2 ? 2 :
1772  D2(bv1 == 2 || bv2 == 2 ? 2 : 3) );
1773  XSN--;
1774  break;
1775 
1776  case OP_EQ:
1777  cv1 = XStack[XSN-2].cval;
1778  cv2 = XStack[XSN-1].cval;
1779  XStack[XSN-2].dval = ( cv1 == cv2 ? 2 : 3 );
1780  XSN--;
1781  break;
1782 
1783  case OP_NE:
1784  cv1 = XStack[XSN-2].cval;
1785  cv2 = XStack[XSN-1].cval;
1786  XStack[XSN-2].dval = ( cv1 != cv2 ? 2 : 3 );
1787  XSN--;
1788  break;
1789 
1790  case OP_GT:
1791  cv1 = XStack[XSN-2].cval;
1792  cv2 = XStack[XSN-1].cval;
1793  XStack[XSN-2].dval = CD2(cv1 > cv2 ? 2 : 3);
1794  XSN--;
1795  break;
1796 
1797  case OP_GE:
1798  cv1 = XStack[XSN-2].cval;
1799  cv2 = XStack[XSN-1].cval;
1800  XStack[XSN-2].dval = CD2(cv1 >= cv2 ? 2 : 3);
1801  XSN--;
1802  break;
1803 
1804  case OP_LT:
1805  cv1 = XStack[XSN-2].cval;
1806  cv2 = XStack[XSN-1].cval;
1807  XStack[XSN-2].dval = CD2(cv1 < cv2 ? 2 : 3);
1808  XSN--;
1809  break;
1810 
1811  case OP_LE:
1812  cv1 = XStack[XSN-2].cval;
1813  cv2 = XStack[XSN-1].cval;
1814  XStack[XSN-2].dval = CD2(cv1 <= cv2 ? 2 : 3);
1815  XSN--;
1816  break;
1817 
1818  case OP_SEQ:
1819  sv1 = XStack[XSN-2].sval;
1820  sv2 = XStack[XSN-1].sval;
1821  XStack[XSN-2].dval =
1822  ( ! sv1 && ! sv2 ? 2 :
1823  ! sv1 || ! sv2 ? 3 :
1824  ! strcmp(sv1, sv2) ? 2 : 3 );
1825  XSN--;
1826  break;
1827 
1828  case OP_SNE:
1829  sv1 = XStack[XSN-2].sval;
1830  sv2 = XStack[XSN-1].sval;
1831  XStack[XSN-2].dval =
1832  ( ! sv1 && ! sv2 ? 3 :
1833  ! sv1 || ! sv2 ? 2 :
1834  strcmp(sv1, sv2) ? 2 : 3 );
1835  XSN--;
1836  break;
1837 
1838  case OP_PLUS:
1839  cv1 = XStack[XSN-2].cval;
1840  cv2 = XStack[XSN-1].cval;
1841  XStack[XSN-2].cval = C2(cv1 + cv2);
1842  XSN--;
1843  break;
1844 
1845  case OP_MINUS:
1846  cv1 = XStack[XSN-2].cval;
1847  cv2 = XStack[XSN-1].cval;
1848  XStack[XSN-2].cval = C2(cv1 - cv2);
1849  XSN--;
1850  break;
1851 
1852  case OP_MULT:
1853  cv1 = XStack[XSN-2].cval;
1854  cv2 = XStack[XSN-1].cval;
1855  XStack[XSN-2].cval = C2(cv1 * cv2);
1856  XSN--;
1857  break;
1858 
1859  case OP_DIV:
1860  /* Note: have to set precision of result */
1861 
1862  cv1 = XStack[XSN-2].cval;
1863  cv2 = XStack[XSN-1].cval;
1864  if ( ! cv2 ||
1865  CUnknownVal(XStack[XSN-2]) ||
1866  CUnknownVal(XStack[XSN-1]) ||
1867  NotApplicVal(XStack[XSN-2]) ||
1868  NotApplicVal(XStack[XSN-1]) )
1869  {
1870  XStack[XSN-2].cval = _UNK.cval;
1871  }
1872  else
1873  {
1874  Mult = Denominator(cv1);
1875  cv1 = cv1 / cv2;
1876  while ( fabs(cv2) > 1 )
1877  {
1878  Mult *= 10;
1879  cv2 /= 10;
1880  }
1881  XStack[XSN-2].cval = rint(cv1 * Mult) / Mult;
1882  }
1883  XSN--;
1884  break;
1885 
1886  case OP_MOD:
1887  cv1 = XStack[XSN-2].cval;
1888  cv2 = XStack[XSN-1].cval;
1889  XStack[XSN-2].cval = C2(fmod(cv1, cv2));
1890  XSN--;
1891  break;
1892 
1893  case OP_POW:
1894  cv1 = XStack[XSN-2].cval;
1895  cv2 = XStack[XSN-1].cval;
1896  XStack[XSN-2].cval =
1897  ( CUNA(XSN-1) || CUNA(XSN-2) ||
1898  ( cv1 < 0 && ceil(cv2) != cv2 ) ? _UNK.cval :
1899  pow(cv1, cv2) );
1900  XSN--;
1901  break;
1902 
1903  case OP_UMINUS:
1904  cv1 = XStack[XSN-1].cval;
1905  XStack[XSN-1].cval = C1(-cv1);
1906  break;
1907 
1908  case OP_SIN:
1909  cv1 = XStack[XSN-1].cval;
1910  XStack[XSN-1].cval = C1(sin(cv1));
1911  break;
1912 
1913  case OP_COS:
1914  cv1 = XStack[XSN-1].cval;
1915  XStack[XSN-1].cval = C1(cos(cv1));
1916  break;
1917 
1918  case OP_TAN:
1919  cv1 = XStack[XSN-1].cval;
1920  XStack[XSN-1].cval = C1(tan(cv1));
1921  break;
1922 
1923  case OP_LOG:
1924  cv1 = XStack[XSN-1].cval;
1925  XStack[XSN-1].cval =
1926  ( CUNA(XSN-1) || cv1 <= 0 ? _UNK.cval : log(cv1) );
1927  break;
1928 
1929  case OP_EXP:
1930  cv1 = XStack[XSN-1].cval;
1931  XStack[XSN-1].cval = C1(exp(cv1));
1932  break;
1933 
1934  case OP_INT:
1935  cv1 = XStack[XSN-1].cval;
1936  XStack[XSN-1].cval = C1(rint(cv1));
1937  break;
1938 
1939  case OP_END:
1940  ReturnVal.cval = XStack[0].cval; /* cval >= dval bytes */
1941  return ReturnVal;
1942  }
1943  }
1944 }
1945 
1946 
1947 
1948 /*************************************************************************/
1949 /* */
1950 /* Routines for reading model files */
1951 /* -------------------------------- */
1952 /* */
1953 /*************************************************************************/
1954 
1955 
1956 int Entry;
1957 
1958 char* Prop[]={"null",
1959  "att",
1960  "class",
1961  "cut",
1962  "conds",
1963  "elts",
1964  "entries",
1965  "forks",
1966  "freq",
1967  "id",
1968  "type",
1969  "low",
1970  "mid",
1971  "high",
1972  "result",
1973  "rules",
1974  "val",
1975  "lift",
1976  "cover",
1977  "ok",
1978  "default",
1979  "costs",
1980  "sample",
1981  "init"
1982  };
1983 
1984 char PropName[20],
1986  *Unquoted;
1988 
1989 #define PROPS 23
1990 
1991 #define ERRORP 0
1992 #define ATTP 1
1993 #define CLASSP 2
1994 #define CUTP 3
1995 #define CONDSP 4
1996 #define ELTSP 5
1997 #define ENTRIESP 6
1998 #define FORKSP 7
1999 #define FREQP 8
2000 #define IDP 9
2001 #define TYPEP 10
2002 #define LOWP 11
2003 #define MIDP 12
2004 #define HIGHP 13
2005 #define RESULTP 14
2006 #define RULESP 15
2007 #define VALP 16
2008 #define LIFTP 17
2009 #define COVERP 18
2010 #define OKP 19
2011 #define DEFAULTP 20
2012 #define COSTSP 21
2013 
2014 
2015 
2016 /*************************************************************************/
2017 /* */
2018 /* Read header information and decide whether model files are */
2019 /* in ASCII or binary format */
2020 /* */
2021 /*************************************************************************/
2022 
2023 
2024 void ReadFilePrefix(String Extension)
2025 /* -------------- */
2026 {
2027 #if defined WIN32 || defined _CONSOLE
2028  if ( ! (TRf = GetFile(Extension, "rb")) ) Error(NOFILE, Fn, "");
2029 #else
2030  if ( ! (TRf = GetFile(Extension, "r")) ) Error(NOFILE, Fn, "");
2031 #endif
2032 
2033  ReadHeader();
2034 }
2035 
2036 
2037 
2038 /*************************************************************************/
2039 /* */
2040 /* Read the header information (id, saved names, models) */
2041 /* */
2042 /*************************************************************************/
2043 
2044 
2046 /* --------- */
2047 {
2048  Attribute Att;
2049  DiscrValue v;
2050  char *p, Dummy;
2051  int Year, Month, Day;
2052  FILE *F;
2053 
2054  while ( true )
2055  {
2056  switch ( ReadProp(&Dummy) )
2057  {
2058  case ERRORP:
2059  return;
2060 
2061  case IDP:
2062  /* Recover year run and set base date for timestamps */
2063 
2064  if ( sscanf(PropVal + strlen(PropVal) - 11,
2065  "%d-%d-%d\"", &Year, &Month, &Day) == 3 )
2066  {
2067  SetTSBase(Year);
2068  }
2069  break;
2070 
2071  case COSTSP:
2072  /* Recover costs file used to generate model */
2073 
2074  if ( (F = GetFile(".costs", "r")) )
2075  {
2076  GetMCosts(F);
2077  }
2078  break;
2079 
2080  case ATTP:
2081  Unquoted = RemoveQuotes(PropVal);
2082  Att = Which(Unquoted, AttName, 1, MaxAtt);
2083  if ( ! Att || Exclude(Att) )
2084  {
2085  Error(MODELFILE, E_MFATT, Unquoted);
2086  }
2087  break;
2088 
2089  case ELTSP:
2090  MaxAttVal[Att] = 1;
2091  AttValName[Att][1] = strdup("N/A");
2092 
2093  for ( p = PropVal ; *p ; )
2094  {
2095  p = RemoveQuotes(p);
2096  v = ++MaxAttVal[Att];
2097  AttValName[Att][v] = strdup(p);
2098 
2099  for ( p += strlen(p) ; *p != '"' ; p++ )
2100  ;
2101  p++;
2102  if ( *p == ',' ) p++;
2103  }
2104  AttValName[Att][MaxAttVal[Att]+1] = "<other>";
2105  break;
2106 
2107  case ENTRIESP:
2108  sscanf(PropVal, "\"%d\"", &TRIALS);
2109  Entry = 0;
2110  return;
2111  }
2112  }
2113 }
2114 
2115 
2116 
2117 /*************************************************************************/
2118 /* */
2119 /* Retrieve decision tree with extension Extension */
2120 /* */
2121 /*************************************************************************/
2122 
2123 
2124 Tree GetTree(String Extension)
2125 /* ------- */
2126 {
2127  CheckFile(Extension, false);
2128 
2129  return InTree();
2130 }
2131 
2132 
2133 
2135 /* ------ */
2136 {
2137  Tree T;
2138  DiscrValue v, Subset=0;
2139  char Delim, *p;
2140  ClassNo c;
2141  int X;
2142  double XD;
2143 
2144  T = (Tree) AllocZero(1, TreeRec);
2145 
2146  do
2147  {
2148  switch ( ReadProp(&Delim) )
2149  {
2150  case ERRORP:
2151  return Nil;
2152 
2153  case TYPEP:
2154  sscanf(PropVal, "\"%d\"", &X); T->NodeType = X;
2155  break;
2156 
2157  case CLASSP:
2158  Unquoted = RemoveQuotes(PropVal);
2159  T->Leaf = Which(Unquoted, ClassName, 1, MaxClass);
2160  if ( ! T->Leaf ) Error(MODELFILE, E_MFCLASS, Unquoted);
2161  break;
2162 
2163  case ATTP:
2164  Unquoted = RemoveQuotes(PropVal);
2165  T->Tested = Which(Unquoted, AttName, 1, MaxAtt);
2166  if ( ! T->Tested || Exclude(T->Tested) )
2167  {
2168  Error(MODELFILE, E_MFATT, Unquoted);
2169  }
2170  break;
2171 
2172  case CUTP:
2173  sscanf(PropVal, "\"%lf\"", &XD); T->Cut = XD;
2174  T->Lower = T->Upper = T->Cut;
2175  break;
2176 
2177  case LOWP:
2178  sscanf(PropVal, "\"%lf\"", &XD); T->Lower = XD;
2179  break;
2180 
2181  case HIGHP:
2182  sscanf(PropVal, "\"%lf\"", &XD); T->Upper = XD;
2183  break;
2184 
2185  case FORKSP:
2186  sscanf(PropVal, "\"%d\"", &T->Forks);
2187  break;
2188 
2189  case FREQP:
2190  T->ClassDist = Alloc(MaxClass+1, CaseCount);
2191  p = PropVal+1;
2192 
2193  ForEach(c, 1, MaxClass)
2194  {
2195  T->ClassDist[c] = strtod(p, &p);
2196  T->Cases += T->ClassDist[c];
2197  p++;
2198  }
2199  break;
2200 
2201  case ELTSP:
2202  if ( ! Subset++ )
2203  {
2204  T->Subset = AllocZero(T->Forks+1, Set);
2205  }
2206 
2207  T->Subset[Subset] = MakeSubset(T->Tested);
2208  break;
2209  }
2210  }
2211  while ( Delim == ' ' );
2212 
2213  if ( T->ClassDist )
2214  {
2215  T->Errors = T->Cases - T->ClassDist[T->Leaf];
2216  }
2217  else
2218  {
2219  T->ClassDist = Alloc(1, CaseCount);
2220  }
2221 
2222  if ( T->NodeType )
2223  {
2224  T->Branch = AllocZero(T->Forks+1, Tree);
2225  ForEach(v, 1, T->Forks)
2226  {
2227  T->Branch[v] = InTree();
2228  }
2229  }
2230 
2231  return T;
2232 }
2233 
2234 
2235 
2236 /*************************************************************************/
2237 /* */
2238 /* Retrieve ruleset with extension Extension */
2239 /* (Separate functions for ruleset, single rule, single condition) */
2240 /* */
2241 /*************************************************************************/
2242 
2243 
2245 /* -------- */
2246 {
2247  CheckFile(Extension, false);
2248 
2249  return InRules();
2250 }
2251 
2252 
2253 
2255 /* ------- */
2256 {
2257  CRuleSet RS;
2258  RuleNo r;
2259  char Delim;
2260 
2261  RS = Alloc(1, RuleSetRec);
2262 
2263  do
2264  {
2265  switch ( ReadProp(&Delim) )
2266  {
2267  case ERRORP:
2268  return Nil;
2269 
2270  case RULESP:
2271  sscanf(PropVal, "\"%d\"", &RS->SNRules);
2273  break;
2274 
2275  case DEFAULTP:
2276  Unquoted = RemoveQuotes(PropVal);
2277  RS->SDefault = Which(Unquoted, ClassName, 1, MaxClass);
2278  if ( ! RS->SDefault ) Error(MODELFILE, E_MFCLASS, Unquoted);
2279  break;
2280  }
2281  }
2282  while ( Delim == ' ' );
2283 
2284  /* Read each rule */
2285 
2286  RS->SRule = Alloc(RS->SNRules+1, CRule);
2287  ForEach(r, 1, RS->SNRules)
2288  {
2289  if ( (RS->SRule[r] = InRule()) )
2290  {
2291  RS->SRule[r]->RNo = r;
2292  RS->SRule[r]->TNo = Entry;
2293  }
2294  }
2295  ConstructRuleTree(RS);
2296  Entry++;
2297  return RS;
2298 }
2299 
2300 
2301 
2303 /* ------ */
2304 {
2305  CRule R;
2306  int d;
2307  char Delim;
2308  float Lift;
2309 
2310  R = Alloc(1, RuleRec);
2311 
2312  do
2313  {
2314  switch ( ReadProp(&Delim) )
2315  {
2316  case ERRORP:
2317  return Nil;
2318 
2319  case CONDSP:
2320  sscanf(PropVal, "\"%d\"", &R->Size);
2321  break;
2322 
2323  case COVERP:
2324  sscanf(PropVal, "\"%f\"", &R->Cover);
2325  break;
2326 
2327  case OKP:
2328  sscanf(PropVal, "\"%f\"", &R->Correct);
2329  break;
2330 
2331  case LIFTP:
2332  sscanf(PropVal, "\"%f\"", &Lift);
2333  R->Prior = (R->Correct + 1) / ((R->Cover + 2) * Lift);
2334  break;
2335 
2336  case CLASSP:
2337  Unquoted = RemoveQuotes(PropVal);
2338  R->Rhs = Which(Unquoted, ClassName, 1, MaxClass);
2339  if ( ! R->Rhs ) Error(MODELFILE, E_MFCLASS, Unquoted);
2340  break;
2341  }
2342  }
2343  while ( Delim == ' ' );
2344 
2345  R->Lhs = Alloc(R->Size+1, Condition);
2346  ForEach(d, 1, R->Size)
2347  {
2348  R->Lhs[d] = InCondition();
2349  }
2350 
2351  R->Vote = 1000 * (R->Correct + 1.0) / (R->Cover + 2.0) + 0.5;
2352 
2353  return R;
2354 }
2355 
2356 
2357 
2359 /* ----------- */
2360 {
2361  Condition C;
2362  char Delim;
2363  int X;
2364  double XD;
2365 
2366  C = Alloc(1, CondRec);
2367 
2368  do
2369  {
2370  switch ( ReadProp(&Delim) )
2371  {
2372  case ERRORP:
2373  return Nil;
2374 
2375  case TYPEP:
2376  sscanf(PropVal, "\"%d\"", &X); C->NodeType = X;
2377  break;
2378 
2379  case ATTP:
2380  Unquoted = RemoveQuotes(PropVal);
2381  C->Tested = Which(Unquoted, AttName, 1, MaxAtt);
2382  if ( ! C->Tested || Exclude(C->Tested) )
2383  {
2384  Error(MODELFILE, E_MFATT, Unquoted);
2385  }
2386  break;
2387 
2388  case CUTP:
2389  sscanf(PropVal, "\"%lf\"", &XD); C->Cut = XD;
2390  break;
2391 
2392  case RESULTP:
2393  C->TestValue = ( PropVal[1] == '<' ? 2 : 3 );
2394  break;
2395 
2396  case VALP:
2397  if ( Continuous(C->Tested) )
2398  {
2399  C->TestValue = 1;
2400  }
2401  else
2402  {
2403  Unquoted = RemoveQuotes(PropVal);
2404  C->TestValue = Which(Unquoted,
2405  AttValName[C->Tested],
2406  1, MaxAttVal[C->Tested]);
2407  if ( ! C->TestValue ) Error(MODELFILE, E_MFATTVAL, Unquoted);
2408  }
2409  break;
2410 
2411  case ELTSP:
2412  C->Subset = MakeSubset(C->Tested);
2413  C->TestValue = 1;
2414  break;
2415  }
2416  }
2417  while ( Delim == ' ' );
2418 
2419  return C;
2420 }
2421 
2422 
2424 int NTest,
2425  TestSpace,
2429 
2430 
2432 /* ----------------- */
2433 {
2434  int r, c;
2435  RuleNo *All;
2436 
2437  Test = Alloc((TestSpace = 1000), Condition);
2438  NTest = 0;
2439 
2440  All = Alloc(RS->SNRules, RuleNo);
2441  ForEach(r, 1, RS->SNRules)
2442  {
2443  All[r-1] = r;
2444 
2445  ForEach(c, 1, RS->SRule[r]->Size)
2446  {
2447  SetTestIndex(RS->SRule[r]->Lhs[c]);
2448  }
2449  }
2450 
2451  TestOccur = Alloc(NTest, int);
2452  TestUsed = AllocZero(NTest, Boolean);
2453 
2454  RuleCondOK = AllocZero(RS->SNRules+1, int);
2455 
2456  RS->RT = GrowRT(All, RS->SNRules, RS->SRule);
2457 
2458  Free(All);
2459  Free(Test);
2460  Free(TestUsed);
2461  Free(TestOccur);
2462  Free(RuleCondOK);
2463 }
2464 
2465 
2466 
2468 /* ------------ */
2469 {
2470  int t;
2471  Condition CC;
2472  Attribute Att;
2473 
2474  Att = C->Tested;
2475 
2476  ForEach(t, 0, NTest-1)
2477  {
2478  CC = Test[t];
2479  if ( CC->Tested != Att || CC->NodeType != C->NodeType ) continue;
2480 
2481  switch ( C->NodeType )
2482  {
2483  case BrDiscr:
2484  C->TestI = t;
2485  return;
2486 
2487  case BrSubset:
2488  if ( ! memcmp(C->Subset, CC->Subset, (MaxAttVal[Att]>>3)+1) )
2489  {
2490  C->TestI = t;
2491  return;
2492  }
2493  break;
2494 
2495  case BrThresh:
2496  if ( C->TestValue == 1 && CC->TestValue == 1 ||
2497  ( C->TestValue != 1 && CC->TestValue != 1 &&
2498  C->Cut == CC->Cut ) )
2499  {
2500  C->TestI = t;
2501  return;
2502  }
2503  break;
2504  }
2505  }
2506 
2507  /* New test -- make sure have enough space */
2508 
2509  if ( NTest >= TestSpace )
2510  {
2511  Realloc(Test, (TestSpace += 1000), Condition);
2512  }
2513 
2514  Test[NTest] = C;
2515  C->TestI = NTest++;
2516 }
2517 
2518 
2519 
2521 /* ------ */
2522 {
2523  RuleTree Node;
2524  RuleNo r, *LR;
2525  int FP=0, ri, TI, *Expect, LRN;
2526  DiscrValue v;
2527 
2528  if ( ! RRN ) return Nil;
2529 
2530  Node = AllocZero(1, RuleTreeRec);
2531 
2532  /* Record and swap to front any rules that are satisfied */
2533 
2534  ForEach(ri, 0, RRN-1)
2535  {
2536  r = RR[ri];
2537 
2538  if ( RuleCondOK[r] == Rule[r]->Size )
2539  {
2540  RR[ri] = RR[FP];
2541  RR[FP] = r;
2542  FP++;
2543  }
2544  }
2545 
2546  if ( FP )
2547  {
2548  Node->Fire = Alloc(FP+1, RuleNo);
2549  memcpy(Node->Fire, RR, FP * sizeof(RuleNo));
2550  Node->Fire[FP] = 0;
2551  RR += FP;
2552  RRN -= FP;
2553  }
2554  else
2555  {
2556  Node->Fire = Nil;
2557  }
2558 
2559  if ( ! RRN ) return Node;
2560 
2561  /* Choose test for this node */
2562 
2563  TI = SelectTest(RR, RRN, Rule);
2564  TestUsed[TI] = true;
2565 
2566  Node->CondTest = Test[TI];
2567 
2568  /* Find the desired outcome for each rule */
2569 
2570  Expect = Alloc(RRN, int);
2571  ForEach(ri, 0, RRN-1)
2572  {
2573  Expect[ri] = DesiredOutcome(Rule[RR[ri]], TI);
2574  }
2575 
2576  /* Now construct individual branches. Rules that do not reference
2577  the selected test go down branch 0; at classification time,
2578  any case with an unknown outcome for the selected test also
2579  goes to branch 0. */
2580 
2581  Node->Forks =
2582  ( Test[TI]->NodeType == BrDiscr ? MaxAttVal[Test[TI]->Tested] :
2583  Test[TI]->NodeType == BrSubset ? 1 : 3 );
2584 
2585  Node->Branch = Alloc(Node->Forks+1, RuleTree);
2586 
2587  LR = Alloc(RRN, RuleNo);
2588  ForEach(v, 0, Node->Forks)
2589  {
2590  /* Extract rules with outcome v and increment conditions satisfied,
2591  if relevant */
2592 
2593  LRN = 0;
2594  ForEach(ri, 0, RRN-1)
2595  {
2596  if ( abs(Expect[ri]) == v )
2597  {
2598  LR[LRN++] = RR[ri];
2599 
2600  if ( Expect[ri] > 0 ) RuleCondOK[RR[ri]]++;
2601  }
2602  }
2603 
2604  /* LR now contains rules with outcome v */
2605 
2606  Node->Branch[v] = GrowRT(LR, LRN, Rule);
2607 
2608  if ( v )
2609  {
2610  /* Restore conditions satisfied */
2611 
2612  ForEach(ri, 0, LRN-1)
2613  {
2614  RuleCondOK[LR[ri]]--;
2615  }
2616  }
2617  }
2618 
2619  TestUsed[TI] = false;
2620 
2621  /* Free local storage */
2622 
2623  Free(LR);
2624  Free(Expect);
2625 
2626  return Node;
2627 }
2628 
2629 
2630 
2632 /* -------------- */
2633 {
2634  int c;
2636 
2637  ContinTest = Continuous(Test[TI]->Tested); /* test of continuous att */
2638 
2639  ForEach(c, 1, R->Size)
2640  {
2641  if ( R->Lhs[c]->TestI == TI )
2642  {
2643  return R->Lhs[c]->TestValue;
2644  }
2645 
2646  /* If this test references the same continuous attribute but
2647  with a different threshold, may be able to exploit outcome:
2648  -2 means "rule can only be matched down branch 2"
2649  -3 means "rule can only be matched down branch 3" */
2650 
2651  if ( ContinTest && Test[TI]->Tested == R->Lhs[c]->Tested )
2652  {
2653  switch ( R->Lhs[c]->TestValue )
2654  {
2655  case 1:
2656  return 1;
2657 
2658  case 2:
2659  if ( R->Lhs[c]->Cut < Test[TI]->Cut ) return -2;
2660  break;
2661 
2662  case 3:
2663  if ( R->Lhs[c]->Cut > Test[TI]->Cut ) return -3;
2664  }
2665  }
2666  }
2667 
2668  return 0;
2669 }
2670 
2671 
2672 
2673 int SelectTest(RuleNo *RR, int RRN, CRule *Rule)
2674 /* ---------- */
2675 {
2676  int c, cc, ri;
2677  RuleNo r;
2678 
2679  /* Count test occurrences */
2680 
2681  ForEach(c, 0, NTest-1)
2682  {
2683  TestOccur[c] = 0;
2684  }
2685 
2686  ForEach(ri, 0, RRN-1)
2687  {
2688  r = RR[ri];
2689 
2690  ForEach(c, 1, Rule[r]->Size)
2691  {
2692  TestOccur[Rule[r]->Lhs[c]->TestI]++;
2693  }
2694  }
2695 
2696  /* Find most frequently-occurring test */
2697 
2698  cc = -1;
2699  ForEach(c, 0, NTest-1)
2700  {
2701  if ( ! TestUsed[c] && ( cc < 0 || TestOccur[c] > TestOccur[cc] ) )
2702  {
2703  cc = c;
2704  }
2705  }
2706 
2707  return cc;
2708 }
2709 
2710 
2711 
2712 /*************************************************************************/
2713 /* */
2714 /* ASCII reading utilities */
2715 /* */
2716 /*************************************************************************/
2717 
2718 
2719 int ReadProp(char *Delim)
2720 /* -------- */
2721 {
2722  int c, i;
2723  char *p;
2724  Boolean Quote=false;
2725 
2726  for ( p = PropName ; (c = fgetc(TRf)) != '=' ; )
2727  {
2728  if ( p - PropName >= 19 || c == EOF )
2729  {
2730  Error(MODELFILE, E_MFEOF, "");
2731  PropName[0] = PropVal[0] = *Delim = '\00';
2732  return 0;
2733  }
2734  *p++ = c;
2735  }
2736  *p = '\00';
2737 
2738  for ( p = PropVal ; ((c = fgetc(TRf)) != ' ' && c != '\n') || Quote ; )
2739  {
2740  if ( c == EOF )
2741  {
2742  Error(MODELFILE, E_MFEOF, "");
2743  PropName[0] = PropVal[0] = '\00';
2744  return 0;
2745  }
2746 
2747  if ( (i = p - PropVal) >= PropValSize )
2748  {
2749  Realloc(PropVal, (PropValSize += 10000) + 3, char);
2750  p = PropVal + i;
2751  }
2752 
2753  *p++ = c;
2754  if ( c == '\\' )
2755  {
2756  *p++ = fgetc(TRf);
2757  }
2758  else
2759  if ( c == '"' )
2760  {
2761  Quote = ! Quote;
2762  }
2763  }
2764  *p = '\00';
2765  *Delim = c;
2766 
2767  return Which(PropName, Prop, 1, PROPS);
2768 }
2769 
2770 
2772 /* ------------ */
2773 {
2774  char *p, *Start;
2775 
2776  p = Start = S;
2777 
2778  for ( S++ ; *S != '"' ; S++ )
2779  {
2780  if ( *S == '\\' ) S++;
2781  *p++ = *S;
2782  *S = '-';
2783  }
2784  *p = '\00';
2785 
2786  return Start;
2787 }
2788 
2789 
2790 
2792 /* ---------- */
2793 {
2794  int Bytes, b;
2795  char *p;
2796  Set S;
2797 
2798  Bytes = (MaxAttVal[Att]>>3) + 1;
2799  S = AllocZero(Bytes, Byte);
2800 
2801  for ( p = PropVal ; *p ; )
2802  {
2803  p = RemoveQuotes(p);
2804  b = Which(p, AttValName[Att], 1, MaxAttVal[Att]);
2805  if ( ! b ) Error(MODELFILE, E_MFATTVAL, p);
2806  SetBit(b, S);
2807 
2808  for ( p += strlen(p) ; *p != '"' ; p++ )
2809  ;
2810  p++;
2811  if ( *p == ',' ) p++;
2812  }
2813 
2814  return S;
2815 }
2816 
2817 
2818 
2819 /*************************************************************************/
2820 /* */
2821 /* Construct a leaf in a given node */
2822 /* */
2823 /*************************************************************************/
2824 
2825 
2826 Tree Leaf(double *Freq, ClassNo NodeClass, CaseCount Cases, CaseCount Errors)
2827 /* ---- */
2828 {
2829  Tree Node;
2830  ClassNo c;
2831 
2832  Node = AllocZero(1, TreeRec);
2833 
2834  Node->ClassDist = AllocZero(MaxClass+1, CaseCount);
2835  if ( Freq )
2836  {
2837  ForEach(c, 1, MaxClass)
2838  {
2839  Node->ClassDist[c] = Freq[c];
2840  }
2841  }
2842 
2843  Node->NodeType = 0;
2844  Node->Leaf = NodeClass;
2845  Node->Cases = Cases;
2846  Node->Errors = Errors;
2847 
2848  return Node;
2849 }
2850 
2851 
2852 /*************************************************************************/
2853 /* */
2854 /* Read variable misclassification costs */
2855 /* */
2856 /*************************************************************************/
2857 
2858 
2859 void GetMCosts(FILE *Cf)
2860 /* --------- */
2861 {
2862  ClassNo Pred, Real, p, r;
2863  char Name[1000];
2864  float Val;
2865 
2866  LineNo = 0;
2867 
2868  /* Read entries from cost file */
2869 
2870  while ( ReadName(Cf, Name, 1000, ':') )
2871  {
2872  if ( ! (Pred = Which(Name, ClassName, 1, MaxClass)) )
2873  {
2874  Error(BADCOSTCLASS, Name, "");
2875  }
2876 
2877  if ( ! ReadName(Cf, Name, 1000, ':') ||
2878  ! (Real = Which(Name, ClassName, 1, MaxClass)) )
2879  {
2880  Error(BADCOSTCLASS, Name, "");
2881  }
2882 
2883  if ( ! ReadName(Cf, Name, 1000, ':') ||
2884  sscanf(Name, "%f", &Val) != 1 || Val < 0 )
2885  {
2886  Error(BADCOST, "", "");
2887  Val = 1;
2888  }
2889 
2890  if ( Pred > 0 && Real > 0 && Pred != Real && Val != 1 )
2891  {
2892  /* Have a non-trivial cost entry */
2893 
2894  if ( ! MCost )
2895  {
2896  /* Set up cost matrices */
2897 
2898  MCost = Alloc(MaxClass+1, float *);
2899  ForEach(p, 1, MaxClass)
2900  {
2901  MCost[p] = Alloc(MaxClass+1, float);
2902  ForEach(r, 1, MaxClass)
2903  {
2904  MCost[p][r] = ( p == r ? 0.0 : 1.0 );
2905  }
2906  }
2907  }
2908 
2909  MCost[Pred][Real] = Val;
2910  }
2911  }
2912  fclose(Cf);
2913 }
2914 
2915 
2916 
2917 /*************************************************************************/
2918 /* */
2919 /* Categorize a case using a decision tree */
2920 /* */
2921 /*************************************************************************/
2922 
2923 
2925 /* ------------ */
2926 {
2927  ClassNo c, C;
2928  double Prior;
2929 
2930  /* Save total leaf count in E->ClassWt[0] */
2931 
2932  ForEach(c, 0, MaxClass)
2933  {
2934  E->ClassWt[c] = 0;
2935  }
2936 
2937  FindLeaf(Case, DecisionTree, Nil, 1.0, E->ClassWt, E->AttUsed);
2938 
2939  C = SelectClass(DecisionTree->Leaf, (Boolean)(MCost != Nil), E->ClassWt);
2940 
2941 #if defined WIN32 || defined PREDICT
2942  ForEach(c, 1, MaxClass)
2943  {
2944  Prior = DecisionTree->ClassDist[c] / DecisionTree->Cases;
2945  E->ClassWt[c] =
2946  (E->ClassWt[0] * E->ClassWt[c] + Prior) / (E->ClassWt[0] + 1);
2947  }
2948  E->Confidence = E->ClassWt[C];
2949 #else
2950  Prior = DecisionTree->ClassDist[C] / DecisionTree->Cases;
2951  E->Confidence =
2952  (E->ClassWt[0] * E->ClassWt[C] + Prior) / (E->ClassWt[0] + 1);
2953 #endif
2954 
2955  return C;
2956 }
2957 
2958 
2959 
2960 /*************************************************************************/
2961 /* */
2962 /* Follow all branches from a node, weighting them in proportion */
2963 /* to the number of training cases they contain */
2964 /* */
2965 /*************************************************************************/
2966 
2967 
2968 
2969 void FollowAllBranches(DataRec Case, Tree T, float Fraction, double *Prob,
2970  Boolean *AttUsed)
2971 /* ----------------- */
2972 {
2973  DiscrValue v;
2974 
2975  ForEach(v, 1, T->Forks)
2976  {
2977  if ( T->Branch[v]->Cases > Epsilon )
2978  {
2979  FindLeaf(Case, T->Branch[v], T,
2980  (Fraction * T->Branch[v]->Cases) / T->Cases,
2981  Prob, AttUsed);
2982  }
2983  }
2984 }
2985 
2986 
2987 
2988 /*************************************************************************/
2989 /* */
2990 /* Classify a case using the given subtree. */
2991 /* */
2992 /*************************************************************************/
2993 
2994 
2995 void FindLeaf(DataRec Case, Tree T, Tree PT, float Fraction, double *Prob,
2996  Boolean *AttUsed)
2997 /* -------- */
2998 {
2999  DiscrValue v, Dv;
3000  ClassNo c;
3001  double NewFrac, BrWt[4];
3002 
3003 
3004  switch ( T->NodeType )
3005  {
3006  case 0: /* leaf */
3007 
3008  LeafUpdate:
3009 
3010  /* Use parent node if effectively no cases at this node */
3011 
3012  if ( T->Cases < Epsilon )
3013  {
3014  T = PT;
3015  }
3016 
3017  /* Update from all classes */
3018 
3019  ForEach(c, 1, MaxClass)
3020  {
3021  Prob[c] += Fraction * T->ClassDist[c] / T->Cases;
3022  }
3023 
3024  Prob[0] += Fraction * T->Cases;
3025 
3026  return;
3027 
3028  case BrDiscr: /* test of discrete attribute */
3029 
3030  Dv = DVal(Case, T->Tested); /* > MaxAttVal if unknown */
3031 
3032  if ( Dv <= T->Forks ) /* Make sure not new discrete value */
3033  {
3034  FindLeaf(Case, T->Branch[Dv], T, Fraction, Prob, AttUsed);
3035  }
3036  else
3037  {
3038  FollowAllBranches(Case, T, Fraction, Prob, AttUsed);
3039  }
3040 
3041  return;
3042 
3043  case BrThresh: /* test of continuous attribute */
3044 
3045  if ( Unknown(Case, T->Tested) )
3046  {
3047  FollowAllBranches(Case, T, Fraction, Prob, AttUsed);
3048  }
3049  else
3050  if ( NotApplic(Case, T->Tested) )
3051  {
3052  FindLeaf(Case, T->Branch[1], T, Fraction, Prob, AttUsed);
3053  }
3054  else
3055  {
3056  /* Find weights for <= and > branches, interpolating if
3057  erobabilistic thresholds are used */
3058 
3059  BrWt[2] = Interpolate(T, CVal(Case, T->Tested));
3060  BrWt[3] = 1 - BrWt[2];
3061 
3062  ForEach(v, 2, 3)
3063  {
3064  if ( (NewFrac = Fraction * BrWt[v]) >= 1E-6 )
3065  {
3066  FindLeaf(Case, T->Branch[v], T, NewFrac, Prob, AttUsed);
3067  }
3068  }
3069  }
3070 
3071  return;
3072 
3073  case BrSubset: /* subset test on discrete attribute */
3074 
3075  Dv = DVal(Case, T->Tested); /* > MaxAttVal if unknown */
3076 
3077  if ( Dv <= MaxAttVal[T->Tested] )
3078  {
3079  ForEach(v, 1, T->Forks)
3080  {
3081  if ( In(Dv, T->Subset[v]) )
3082  {
3083  FindLeaf(Case, T->Branch[v], T, Fraction, Prob, AttUsed);
3084 
3085  return;
3086  }
3087  }
3088 
3089  /* Value not found in any subset -- treat as leaf */
3090 
3091  goto LeafUpdate;
3092  }
3093  else
3094  {
3095  FollowAllBranches(Case, T, Fraction, Prob, AttUsed);
3096  }
3097  }
3098 }
3099 
3100 
3101 
3102 /*************************************************************************/
3103 /* */
3104 /* Categorize a case using a ruleset */
3105 /* */
3106 /*************************************************************************/
3107 
3108 
3110 /* ------------ */
3111 {
3112  ClassNo c, Best;
3113  double TotWeight=0, TotVote=0;
3114  int a;
3115  CRule R;
3116  RuleNo r;
3117 
3118  ForEach(c, 0, MaxClass)
3119  {
3120  E->ClassWt[c] = 0;
3121  E->MostSpec[c] = Nil;
3122  }
3123 
3124  /* Find active rules */
3125 
3126  E->NActive = 0;
3127 
3128  if ( RS->RT )
3129  {
3130  MarkActive(RS->RT, Case, E);
3131  }
3132  else
3133  {
3134  ForEach(r, 1, RS->SNRules)
3135  {
3136  R = RS->SRule[r];
3137 
3138  if ( Matches(R, Case) )
3139  {
3140  E->Active[E->NActive++] = r;
3141  }
3142  }
3143  }
3144 
3145  if ( RULESUSED )
3146  {
3147  E->RulesUsed[E->NRulesUsed++] = E->NActive;
3148  ForEach(a, 0, E->NActive-1)
3149  {
3150  E->RulesUsed[E->NRulesUsed++] = E->Active[a];
3151  }
3152  }
3153 
3154  /* Vote active rules */
3155 
3156  ForEach(a, 0, E->NActive-1)
3157  {
3158  r = E->Active[a];
3159  R = RS->SRule[r];
3160 
3161  E->ClassWt[R->Rhs] += R->Vote;
3162  TotVote += R->Vote;
3163  TotWeight += 1000.0;
3164 
3165  /* Check whether this is the most specific rule for this class;
3166  resolve ties in favor of rule with higher vote */
3167 
3168  if ( ! E->MostSpec[R->Rhs] ||
3169  R->Cover < E->MostSpec[R->Rhs]->Cover ||
3170  ( R->Cover == E->MostSpec[R->Rhs]->Cover &&
3171  R->Vote > E->MostSpec[R->Rhs]->Vote ) )
3172  {
3173  E->MostSpec[R->Rhs] = R;
3174  }
3175  }
3176 
3177  /* Check for default */
3178 
3179  if ( ! TotWeight ) /* no applicable rules */
3180  {
3181  E->Confidence = 0.5;
3182  return RS->SDefault;
3183  }
3184 
3185  Best = SelectClass(RS->SDefault, false, E->ClassWt);
3186 
3187  /* Set E->Confidence to the maximum of the most specific applicable
3188  rule for class Best or the scaled E->ClassWt[Best] value */
3189 
3190  E->Confidence = Max(E->MostSpec[Best]->Vote / 1000.0,
3191  E->ClassWt[Best] / TotWeight);
3192 
3193 #if defined WIN32 || defined PREDICT
3194  /* Set all confidence values in E->ClassWt */
3195 
3196  E->ClassWt[Best] = E->Confidence;
3197  ForEach(c, 1, MaxClass)
3198  {
3199  if ( c != Best && E->ClassWt[c] > 0 )
3200  {
3201  E->ClassWt[c] =
3202  Min(E->ClassWt[c] / TotWeight, E->MostSpec[c]->Vote / 1000.0);
3203  }
3204  }
3205 #endif
3206 
3207  return Best;
3208 }
3209 
3210 
3211 
3212 /*************************************************************************/
3213 /* */
3214 /* Determine outcome of a test on a case. */
3215 /* Return -1 if value of tested attribute is unknown */
3216 /* */
3217 /*************************************************************************/
3218 
3219 
3221 /* ----------- */
3222 {
3223  DiscrValue v, Outcome;
3224  Attribute Att;
3225 
3226  Att = OneCond->Tested;
3227 
3228  /* Determine the outcome of this test on this case */
3229 
3230  switch ( OneCond->NodeType )
3231  {
3232  case BrDiscr: /* test of discrete attribute */
3233 
3234  v = XDVal(Case, Att);
3235  Outcome = ( v == 0 ? -1 : v );
3236  break;
3237 
3238  case BrThresh: /* test of continuous attribute */
3239 
3240  Outcome = ( Unknown(Case, Att) ? -1 :
3241  NotApplic(Case, Att) ? 1 :
3242  CVal(Case, Att) <= OneCond->Cut ? 2 : 3 );
3243  break;
3244 
3245  case BrSubset: /* subset test on discrete attribute */
3246 
3247  v = XDVal(Case, Att);
3248  Outcome = ( v <= MaxAttVal[Att] && In(v, OneCond->Subset) ?
3249  OneCond->TestValue : 0 );
3250  }
3251 
3252  return Outcome;
3253 }
3254 
3255 
3256 
3257 /*************************************************************************/
3258 /* */
3259 /* Determine whether a case satisfies a condition */
3260 /* */
3261 /*************************************************************************/
3262 
3263 
3265 /* --------- */
3266 {
3267  return ( FindOutcome(Case, OneCond) == OneCond->TestValue );
3268 }
3269 
3270 
3271 
3272 /*************************************************************************/
3273 /* */
3274 /* Determine whether a case satisfies all conditions of a rule */
3275 /* */
3276 /*************************************************************************/
3277 
3278 
3280 /* ------- */
3281 {
3282  int d;
3283 
3284  ForEach(d, 1, R->Size)
3285  {
3286  if ( ! Satisfies(Case, R->Lhs[d]) )
3287  {
3288  return false;
3289  }
3290  }
3291 
3292  return true;
3293 }
3294 
3295 
3296 
3297 /*************************************************************************/
3298 /* */
3299 /* Make sure that Active[] has space for at least N rules */
3300 /* */
3301 /*************************************************************************/
3302 
3303 
3304 void CheckActiveSpace(int N, CEnv E)
3305 /* ---------------- */
3306 {
3307  if ( E->ActiveSpace <= N )
3308  {
3309  Realloc(E->Active, (E->ActiveSpace=N+1), RuleNo);
3310  }
3311 }
3312 
3313 
3314 
3315 /*************************************************************************/
3316 /* */
3317 /* Use RT to enter active rules in Active[] */
3318 /* */
3319 /*************************************************************************/
3320 
3321 
3323 /* ---------- */
3324 {
3325  DiscrValue v;
3326  int ri;
3327  RuleNo r;
3328 
3329  if ( ! RT ) return;
3330 
3331  /* Enter any rules satisfied at this node */
3332 
3333  if ( RT->Fire )
3334  {
3335  for ( ri = 0 ; (r = RT->Fire[ri]) ; ri++ )
3336  {
3337  E->Active[E->NActive++] = r;
3338  }
3339  }
3340 
3341  if ( ! RT->Branch ) return;
3342 
3343  /* Explore subtree for rules that include condition at this node */
3344 
3345  if ( (v = FindOutcome(Case, RT->CondTest)) > 0 && v <= RT->Forks )
3346  {
3347  MarkActive(RT->Branch[v], Case, E);
3348  }
3349 
3350  /* Explore default subtree for rules that do not include condition */
3351 
3352  MarkActive(RT->Branch[0], Case, E);
3353 }
3354 
3355 
3356 
3357 /*************************************************************************/
3358 /* */
3359 /* Classify a case using boosted tree or rule sequence */
3360 /* */
3361 /*************************************************************************/
3362 
3363 
3365 /* ------------- */
3366 {
3367  ClassNo c, Best;
3368  int t;
3369  double Total=0;
3370 
3371  ForEach(c, 1, MaxClass)
3372  {
3373  E->Vote[c] = 0;
3374  }
3375 
3376  ForEach(t, 0, MaxTrial)
3377  {
3378  Best = ( RULES ? RuleClassify(Case, RuleSet[t], E) :
3379  TreeClassify(Case, Pruned[t], E) );
3380 
3381  E->Vote[Best] += E->Confidence;
3382  Total += E->Confidence;
3383 
3384  }
3385 
3386  /* Copy normalised votes into E->ClassWt */
3387 
3388  ForEach(c, 1, MaxClass)
3389  {
3390  E->ClassWt[c] = E->Vote[c] / Total;
3391  }
3392 
3393  Best = SelectClass(Default, false, E->ClassWt);
3394 
3395  E->Confidence = E->ClassWt[Best];
3396 
3397  return Best;
3398 }
3399 
3400 
3401 
3402 /*************************************************************************/
3403 /* */
3404 /* Select the best class to return. Take misclassification costs */
3405 /* into account if they are defined. */
3406 /* */
3407 /*************************************************************************/
3408 
3409 
3410 ClassNo SelectClass(ClassNo Default, Boolean UseCosts, double *Prob)
3411 /* ----------- */
3412 {
3413  ClassNo c, BestClass;
3414  double ExpCost, BestCost=1E10;
3415 
3416  BestClass = Default;
3417 
3418  if ( UseCosts )
3419  {
3420  ForEach(c, 1, MaxClass)
3421  {
3422  if ( ! Prob[c] ) continue;
3423 
3424  ExpCost = MisclassCost(Prob, c);
3425 
3426  if ( ExpCost < BestCost )
3427  {
3428  BestClass = c;
3429  BestCost = ExpCost;
3430  }
3431  }
3432  }
3433  else
3434  {
3435  ForEach(c, 1, MaxClass)
3436  {
3437  if ( Prob[c] > Prob[BestClass] ) BestClass = c;
3438  }
3439  }
3440 
3441  return BestClass;
3442 }
3443 
3444 
3445 
3446 /*************************************************************************/
3447 /* */
3448 /* Find total misclassification cost of choosing class C */
3449 /* for cases in LocalFreq[] */
3450 /* */
3451 /*************************************************************************/
3452 
3453 
3454 double MisclassCost(double *LocalFreq, ClassNo C)
3455 /* ------------ */
3456 {
3457  double ExpCost=0;
3458  ClassNo c;
3459 
3460  ForEach(c, 1, MaxClass)
3461  {
3462  if ( c != C )
3463  {
3464  ExpCost += LocalFreq[c] * MCost[C][c];
3465  }
3466  }
3467 
3468  return ExpCost;
3469 }
3470 
3471 
3472 
3473 /*************************************************************************/
3474 /* */
3475 /* General classification routine */
3476 /* */
3477 /*************************************************************************/
3478 
3479 
3481 /* -------- */
3482 {
3483  E->NRulesUsed = 0;
3484 
3485  return ( TRIALS > 1 ? BoostClassify(Case, TRIALS-1, E) :
3486  RULES ? RuleClassify(Case, RuleSet[0], E) :
3487  TreeClassify(Case, Pruned[0], E) );
3488 }
3489 
3490 
3491 
3492 /*************************************************************************/
3493 /* */
3494 /* Interpolate a single value between Lower, Cut and Upper */
3495 /* */
3496 /*************************************************************************/
3497 
3498 
3500 /* ----------- */
3501 {
3502  return ( Val <= T->Lower ? 1.0 :
3503  Val >= T->Upper ? 0.0 :
3504  Val <= T->Cut ?
3505  1 - 0.5 * (Val - T->Lower) / (T->Cut - T->Lower + 1E-10) :
3506  0.5 * (Val - T->Upper) / (T->Cut - T->Upper + 1E-10) );
3507 }
3508 
3509 
3510 
3511 /*************************************************************************/
3512 /* */
3513 /* Open file with given extension for read/write */
3514 /* */
3515 /*************************************************************************/
3516 
3517 
3518 FILE *GetFile(String Extension, String RW)
3519 /* -------- */
3520 {
3521  strcpy(Fn, FileStem);
3522  strcat(Fn, Extension);
3523  return fopen(Fn, RW);
3524 }
3525 
3526 
3527 
3528 /*************************************************************************/
3529 /* */
3530 /* Check whether file is open. If it is not, open it and */
3531 /* read/write discrete names */
3532 /* */
3533 /*************************************************************************/
3534 
3535 
3536 void CheckFile(String Extension, Boolean Write)
3537 /* --------- */
3538 {
3539  static char *LastExt="";
3540 
3541  if ( ! TRf || strcmp(LastExt, Extension) )
3542  {
3543  LastExt = Extension;
3544 
3545  if ( TRf )
3546  {
3547  fprintf(TRf, "\n");
3548  fclose(TRf);
3549  }
3550 
3551  ReadFilePrefix(Extension);
3552  }
3553 }
3554 
3555 
3556 
3557 /*************************************************************************/
3558 /* */
3559 /* Specialised form of the getopt() utility */
3560 /* */
3561 /*************************************************************************/
3562 
3564 
3565 
3566 char ProcessOption(int Argc, char *Argv[], char *Options)
3567 /* ------------- */
3568 {
3569  int i;
3570  static int OptNo=1;
3571 
3572  if ( OptNo >= Argc ) return '\00';
3573 
3574  if ( *(Option = Argv[OptNo++]) != '-' ) return '?';
3575 
3576  for ( i = 0 ; Options[i] ; i++ )
3577  {
3578  if ( Options[i] == Option[1] )
3579  {
3580  OptArg = (char *) ( Options[i+1] != '+' ? Nil :
3581  Option[2] ? Option+2 :
3582  OptNo < Argc ? Argv[OptNo++] : "0" );
3583  return Option[1];
3584  }
3585  }
3586 
3587  return '?';
3588 }
3589 
3590 
3591 
3592 /*************************************************************************/
3593 /* */
3594 /* Protected memory allocation routines */
3595 /* */
3596 /*************************************************************************/
3597 
3598 
3599 
3600 void *Pmalloc(size_t Bytes)
3601 /* ------- */
3602 {
3603  void *p=Nil;
3604 
3605  if ( ! Bytes || (p = (void *) malloc(Bytes)) )
3606  {
3607  return p;
3608  }
3609 
3610  Error(NOMEM, "", "");
3611 
3612 #ifdef WIN32
3613  return Nil;
3614 #endif
3615 }
3616 
3617 
3618 
3619 void *Prealloc(void *Present, size_t Bytes)
3620 /* -------- */
3621 {
3622  void *p=Nil;
3623 
3624  if ( ! Bytes ) return Nil;
3625 
3626  if ( ! Present ) return Pmalloc(Bytes);
3627 
3628  if ( (p = (void *) realloc(Present, Bytes)) )
3629  {
3630  return p;
3631  }
3632 
3633  Error(NOMEM, "", "");
3634 
3635 #ifdef WIN32
3636  return Nil;
3637 #endif
3638 }
3639 
3640 
3641 
3642 void *Pcalloc(size_t Number, unsigned int Size)
3643 /* ------- */
3644 {
3645  void *p=Nil;
3646 
3647  if ( ! Number || (p = (void *) calloc(Number, Size)) )
3648  {
3649  return p;
3650  }
3651 
3652  Error(NOMEM, "", "");
3653 
3654 #ifdef WIN32
3655  return Nil;
3656 #endif
3657 }
3658 
3659 
3660 
3661 /*************************************************************************/
3662 /* */
3663 /* Error messages */
3664 /* */
3665 /*************************************************************************/
3666 
3667 
3668 void Error(int ErrNo, String S1, String S2)
3669 /* ----- */
3670 {
3671  Boolean Quit=false, WarningOnly=false;
3672  char Buffer[10000], *Msg=Buffer;
3673 
3674 #ifdef WIN32
3675  if ( ErrNo == NOMEM )
3676  {
3677  MessageBox(NULL, "Cannot allocate sufficient memory", "Fatal Error",
3678  MB_ICONERROR | MB_OK);
3679  Goodbye(1);
3680  }
3681  else
3682  if ( ErrNo == MODELFILE )
3683  {
3684  if ( ! ErrMsgs )
3685  {
3686  sprintf(Msg, "File %s is incompatible with .names file\n(%s `%s')",
3687  Fn, S1, S2);
3688  MessageBox(NULL, Msg, "Cannot Load Classifier",
3689  MB_ICONERROR | MB_OK);
3690  }
3691  ErrMsgs++;
3692  return;
3693  }
3694 #endif
3695 
3696  if ( Of ) fprintf(Of, "\n");
3697 
3698  if ( ErrNo == NOFILE || ErrNo == NOMEM || ErrNo == MODELFILE )
3699  {
3700  sprintf(Msg, "*** ");
3701  }
3702  else
3703  {
3704  sprintf(Msg, TX_Line(LineNo, Fn));
3705  }
3706  Msg += strlen(Buffer);
3707 
3708  switch ( ErrNo )
3709  {
3710  case NOFILE:
3711  sprintf(Msg, E_NOFILE(Fn, S2));
3712  Quit = true;
3713  break;
3714 
3715  case BADCLASSTHRESH:
3716  sprintf(Msg, E_BADCLASSTHRESH, S1);
3717  break;
3718 
3719  case LEQCLASSTHRESH:
3720  sprintf(Msg, E_LEQCLASSTHRESH, S1);
3721  break;
3722 
3723  case BADATTNAME:
3724  sprintf(Msg, E_BADATTNAME, S1);
3725  break;
3726 
3727  case EOFINATT:
3728  sprintf(Msg, E_EOFINATT, S1);
3729  break;
3730 
3731  case SINGLEATTVAL:
3732  sprintf(Msg, E_SINGLEATTVAL(S1, S2));
3733  break;
3734 
3735  case DUPATTNAME:
3736  sprintf(Msg, E_DUPATTNAME, S1);
3737  break;
3738 
3739  case CWTATTERR:
3740  sprintf(Msg, E_CWTATTERR);
3741  break;
3742 
3743  case BADATTVAL:
3744  sprintf(Msg, E_BADATTVAL(S2, S1));
3745  break;
3746 
3747  case BADNUMBER:
3748  sprintf(Msg, E_BADNUMBER(S1));
3749  break;
3750 
3751  case BADCLASS:
3752  sprintf(Msg, E_BADCLASS, S2);
3753  break;
3754 
3755  case BADCOSTCLASS:
3756  sprintf(Msg, E_BADCOSTCLASS, S1);
3757  Quit = true;
3758  break;
3759 
3760  case BADCOST:
3761  sprintf(Msg, E_BADCOST, S1);
3762  Quit = true;
3763  break;
3764 
3765  case NOMEM:
3766  sprintf(Msg, E_NOMEM);
3767  Quit = true;
3768  break;
3769 
3770  case TOOMANYVALS:
3771  sprintf(Msg, E_TOOMANYVALS(S1, (int) (long) S2));
3772  break;
3773 
3774  case BADDISCRETE:
3775  sprintf(Msg, E_BADDISCRETE, S1);
3776  break;
3777 
3778  case NOTARGET:
3779  sprintf(Msg, E_NOTARGET, S1);
3780  Quit = true;
3781  break;
3782 
3783  case BADCTARGET:
3784  sprintf(Msg, E_BADCTARGET, S1);
3785  Quit = true;
3786  break;
3787 
3788  case BADDTARGET:
3789  sprintf(Msg, E_BADDTARGET, S1);
3790  Quit = true;
3791  break;
3792 
3793  case LONGNAME:
3794  sprintf(Msg, E_LONGNAME);
3795  Quit = true;
3796  break;
3797 
3798  case HITEOF:
3799  sprintf(Msg, E_HITEOF);
3800  break;
3801 
3802  case MISSNAME:
3803  sprintf(Msg, E_MISSNAME, S2);
3804  break;
3805 
3806  case BADTSTMP:
3807  sprintf(Msg, E_BADTSTMP(S2, S1));
3808  break;
3809 
3810  case BADDATE:
3811  sprintf(Msg, E_BADDATE(S2, S1));
3812  break;
3813 
3814  case BADTIME:
3815  sprintf(Msg, E_BADTIME(S2, S1));
3816  break;
3817 
3818  case UNKNOWNATT:
3819  sprintf(Msg, E_UNKNOWNATT, S1);
3820  break;
3821 
3822  case BADDEF1:
3823  sprintf(Msg, E_BADDEF1(AttName[MaxAtt], S1, S2));
3824  break;
3825 
3826  case BADDEF2:
3827  sprintf(Msg, E_BADDEF2(AttName[MaxAtt], S1, S2));
3828  break;
3829 
3830  case SAMEATT:
3831  sprintf(Msg, E_SAMEATT(AttName[MaxAtt], S1));
3832  WarningOnly = true;
3833  break;
3834 
3835  case BADDEF3:
3836  sprintf(Msg, E_BADDEF3, AttName[MaxAtt]);
3837  break;
3838 
3839  case BADDEF4:
3840  sprintf(Msg, E_BADDEF4, AttName[MaxAtt]);
3841  WarningOnly = true;
3842  break;
3843 
3844  case MODELFILE:
3845  sprintf(Msg, EX_MODELFILE(Fn));
3846  sprintf(Msg, " (%s `%s')\n", S1, S2);
3847  Quit = true;
3848  break;
3849  }
3850 
3851 #ifdef WIN32
3852  if ( Of )
3853  {
3854  fprintf(Of, Buffer);
3855  }
3856  else
3857  if ( ErrMsgs <= 10 )
3858  {
3859  MessageBox(NULL, Buffer, ( WarningOnly ? "Warning" : "Error" ), MB_OK);
3860  }
3861 #else
3862  fprintf(Of, Buffer);
3863 #endif
3864 
3865  if ( ! WarningOnly ) ErrMsgs++;
3866 
3867  if ( ErrMsgs == 10 )
3868  {
3869 #if defined WIN32 && ! defined _CONSOLE
3870  MessageBox(NULL, T_ErrorLimit, "Too many errors!", MB_OK);
3871 #else
3872  fprintf(Of, T_ErrorLimit);
3873 #endif
3874  Quit = true;
3875  }
3876 
3877  if ( Quit && Of )
3878  {
3879  Goodbye(1);
3880  }
3881 }
3882 
3883 
3884 
3885 /*************************************************************************/
3886 /* */
3887 /* Determine precision of floating value */
3888 /* */
3889 /*************************************************************************/
3890 
3891 
3893 /* ----------- */
3894 {
3895  double RoundErr, Accuracy;
3896  int Mult;
3897 
3898  Accuracy = fabs(Val) * 1E-6; /* approximate */
3899  Val = modf(Val, &RoundErr);
3900 
3901  for ( Mult = 100000 ; Mult >= 1 ; Mult /= 10 )
3902  {
3903  RoundErr = fabs(rint(Val * Mult) / Mult - Val);
3904  if ( RoundErr > 2 * Accuracy )
3905  {
3906  return Mult * 10;
3907  }
3908  }
3909 
3910  return 1;
3911 }
3912 
3913 
3914 
3915 /*************************************************************************/
3916 /* */
3917 /* Routines to process dates (algorithm due to Gauss), */
3918 /* times, and timestamps */
3919 /* */
3920 /*************************************************************************/
3921 
3922 
3923 int GetInt(String S, int N)
3924 /* ------ */
3925 {
3926  int Result=0;
3927 
3928  while ( N-- )
3929  {
3930  if ( ! isdigit(*S) ) return 0;
3931 
3932  Result = Result * 10 + (*S++ - '0');
3933  }
3934 
3935  return Result;
3936 }
3937 
3938 
3939 
3940 int DateToDay(String DS) /* Day 1 is 0000/03/01 */
3941 /* --------- */
3942 {
3943  int Year, Month, Day;
3944 
3945  if ( strlen(DS) != 10 ) return 0;
3946 
3947  Year = GetInt(DS, 4);
3948  Month = GetInt(DS+5, 2);
3949  Day = GetInt(DS+8, 2);
3950 
3951  if ( ! ( DS[4] == '/' && DS[7] == '/' || DS[4] == '-' && DS[7] == '-' ) ||
3952  Year < 0 || Month < 1 || Day < 1 ||
3953  Month > 12 ||
3954  Day > 31 ||
3955  Day > 30 &&
3956  ( Month == 4 || Month == 6 || Month == 9 || Month == 11 ) ||
3957  Month == 2 &&
3958  ( Day > 29 ||
3959  Day > 28 && ( Year % 4 != 0 ||
3960  Year % 100 == 0 && Year % 400 != 0 ) ) )
3961  {
3962  return 0;
3963  }
3964 
3965  if ( (Month -= 2) <= 0 )
3966  {
3967  Month += 12;
3968  Year -= 1;
3969  }
3970 
3971  return Year * 365 + Year / 4 - Year / 100 + Year / 400
3972  + 367 * Month / 12
3973  + Day - 30;
3974 }
3975 
3976 
3977 
3979 /* ---------- */
3980 {
3981  int Hour, Mins, Secs;
3982 
3983  if ( strlen(TS) != 8 ) return -1;
3984 
3985  Hour = GetInt(TS, 2);
3986  Mins = GetInt(TS+3, 2);
3987  Secs = GetInt(TS+6, 2);
3988 
3989  if ( TS[2] != ':' || TS[5] != ':' ||
3990  Hour >= 24 || Mins >= 60 || Secs >= 60 )
3991  {
3992  return -1;
3993  }
3994 
3995  return Hour * 3600 + Mins * 60 + Secs;
3996 }
3997 
3998 
3999 
4000 void SetTSBase(int y)
4001 /* --------- */
4002 {
4003  y -= 15;
4004  TSBase = y * 365 + y / 4 - y / 100 + y / 400 + (367 * 4) / 12 + 1 - 30;
4005 }
4006 
4007 
4008 
4010 /* ------------ */
4011 {
4012  int Day, Sec, i;
4013 
4014  /* Check for reasonable length and space between date and time */
4015 
4016  if ( strlen(TS) < 19 || ! Space(TS[10]) ) return (1 << 30);
4017 
4018  /* Read date part */
4019 
4020  TS[10] = '\00';
4021  Day = DateToDay(TS);
4022  TS[10] = ' ';
4023 
4024  /* Skip one or more spaces */
4025 
4026  for ( i = 11 ; TS[i] && Space(TS[i]) ; i++ )
4027  ;
4028 
4029  /* Read time part */
4030 
4031  Sec = TimeToSecs(TS+i);
4032 
4033  /* Return a long time in the future if there is an error */
4034 
4035  return ( Day < 1 || Sec < 0 ? (1 << 30) :
4036  (Day - TSBase) * 1440 + (Sec + 30) / 60 );
4037 }
4038 
4039 
4040 
4041 /*************************************************************************/
4042 /* */
4043 /* Free case space */
4044 /* */
4045 /*************************************************************************/
4046 
4047 
4049 /* ------------ */
4050 {
4051  free(&DVec[-1]);
4052  IValsOffset = 0;
4053 }
4054 
4055 
4056 
4057 /*************************************************************************/
4058 /* */
4059 /* Deallocate the space used to perform classification */
4060 /* */
4061 /*************************************************************************/
4062 
4063 
4065 /* ----------- */
4066 {
4067  /* Free memory allocated for classifier */
4068 
4069  if ( RULES )
4070  {
4071  ForEach(Trial, 0, TRIALS-1)
4072  {
4074  }
4075  free(RuleSet);
4076 
4080  }
4081  else
4082  {
4083  ForEach(Trial, 0, TRIALS-1)
4084  {
4085  FreeTree(Pruned[Trial]);
4086  }
4087  free(Pruned);
4088  }
4089 
4090  // FIXME: This causes a problem so commented out (need to fix memory leak)
4091  //FreeUnlessNil(PropVal);
4092 
4093  /* Free memory allocated for cost matrix */
4094 
4095  if ( MCost )
4096  {
4097  FreeVector((void **) MCost, 1, MaxClass);
4098  }
4099 
4100  /* Free memory for names etc */
4101 
4102  FreeNames();
4104 
4105  free(GCEnv->ClassWt);
4106  free(GCEnv->Vote);
4107  free(GCEnv);
4108 }
4109 
4110 
4111 
4112 /*************************************************************************/
4113 /* */
4114 /* Free up all space allocated by GetNames() */
4115 /* */
4116 /*************************************************************************/
4117 
4118 
4120 /* --------- */
4121 {
4122  Attribute a, t;
4123 
4124  if ( ! AttName ) return;
4125 
4126  ForEach(a, 1, MaxAtt)
4127  {
4128  if ( a != ClassAtt && Discrete(a) )
4129  {
4130  FreeVector((void **) AttValName[a], 1, MaxAttVal[a]);
4131  }
4132  }
4136  FreeVector((void **) AttName, 1, MaxAtt); AttName = Nil;
4137  FreeVector((void **) ClassName, 1, MaxClass); ClassName = Nil;
4138 
4140 
4141  /* Definitions (if any) */
4142 
4143  if ( AttDef )
4144  {
4145  ForEach(a, 1, MaxAtt)
4146  {
4147  if ( AttDef[a] )
4148  {
4149  for ( t = 0 ; DefOp(AttDef[a][t]) != OP_END ; t++ )
4150  {
4151  if ( DefOp(AttDef[a][t]) == OP_STR )
4152  {
4153  Free(DefSVal(AttDef[a][t]));
4154  }
4155  }
4156 
4157  Free(AttDef[a]);
4158  }
4159  }
4160  Free(AttDef); AttDef = Nil;
4161  }
4162 }
4163 
4164 
4165 
4166 /*************************************************************************/
4167 /* */
4168 /* Free up space taken up by tree Node */
4169 /* */
4170 /*************************************************************************/
4171 
4172 
4174 /* -------- */
4175 {
4176  DiscrValue v;
4177 
4178  if ( ! T ) return;
4179 
4180  if ( T->NodeType )
4181  {
4182  ForEach(v, 1, T->Forks)
4183  {
4184  FreeTree(T->Branch[v]);
4185  }
4186 
4187  Free(T->Branch);
4188 
4189  if ( T->NodeType == BrSubset )
4190  {
4191  FreeVector((void **) T->Subset, 1, T->Forks);
4192  }
4193 
4194  }
4195 
4196  Free(T->ClassDist);
4197  Free(T);
4198 }
4199 
4200 
4201 
4202 /*************************************************************************/
4203 /* */
4204 /* Deallocate the space used to store rules */
4205 /* */
4206 /*************************************************************************/
4207 
4208 
4210 /* -------- */
4211 {
4212  int d;
4213 
4214  ForEach(d, 1, R->Size)
4215  {
4216  if ( R->Lhs[d]->NodeType == BrSubset )
4217  {
4218  FreeUnlessNil(R->Lhs[d]->Subset);
4219  }
4220  FreeUnlessNil(R->Lhs[d]);
4221  }
4222  FreeUnlessNil(R->Lhs);
4223  FreeUnlessNil(R);
4224 }
4225 
4226 
4227 
4229 /* ------------ */
4230 {
4231  int b;
4232 
4233  if ( ! RT ) return;
4234 
4235  if ( RT->Branch )
4236  {
4237  ForEach(b, 0, RT->Forks )
4238  {
4239  FreeRuleTree(RT->Branch[b]);
4240  }
4241  Free(RT->Branch);
4242  }
4243 
4244  /* Don't free RT->Cond since this is just a pointer to a condition
4245  in one of the rules */
4246 
4247  FreeUnlessNil(RT->Fire);
4248  Free(RT);
4249 }
4250 
4251 
4252 
4254 /* --------- */
4255 {
4256  int ri;
4257 
4258  ForEach(ri, 1, RS->SNRules)
4259  {
4260  FreeRule(RS->SRule[ri]);
4261  }
4262  Free(RS->SRule);
4263  FreeRuleTree(RS->RT);
4264  Free(RS);
4265 }
4266 
4267 
4268 
4269 void FreeVector(void **V, int First, int Last)
4270 /* ---------- */
4271 {
4272  if ( V )
4273  {
4274  while ( First <= Last )
4275  {
4276  FreeUnlessNil(V[First]);
4277  First++;
4278  }
4279 
4280  Free(V);
4281  }
4282 }