mageec  0.1.0
MAchine Guided Energy Efficient Compilation
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
getnames.c
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Copyright 2010 Rulequest Research Pty Ltd. */
4 /* */
5 /* This file is part of C5.0 GPL Edition, a single-threaded version */
6 /* of C5.0 release 2.07. */
7 /* */
8 /* C5.0 GPL Edition is free software: you can redistribute it and/or */
9 /* modify it under the terms of the GNU General Public License as */
10 /* published by the Free Software Foundation, either version 3 of the */
11 /* License, or (at your option) any later version. */
12 /* */
13 /* C5.0 GPL Edition is distributed in the hope that it will be useful, */
14 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
15 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */
16 /* General Public License for more details. */
17 /* */
18 /* You should have received a copy of the GNU General Public License */
19 /* (gpl.txt) along with C5.0 GPL Edition. If not, see */
20 /* */
21 /* <http://www.gnu.org/licenses/>. */
22 /* */
23 /*************************************************************************/
24 
25 
26 
27 /*************************************************************************/
28 /* */
29 /* Get names of classes, attributes and attribute values */
30 /* ----------------------------------------------------- */
31 /* */
32 /*************************************************************************/
33 
34 
35 #include "defns.i"
36 #include "extern.i"
37 
38 #include <sys/types.h>
39 #include <sys/stat.h>
40 
41 #define MAXLINEBUFFER 10000
44 
45 
46 
47 /*************************************************************************/
48 /* */
49 /* Read a name from file f into string s, setting Delimiter. */
50 /* */
51 /* - Embedded periods are permitted, but periods followed by space */
52 /* characters act as delimiters. */
53 /* - Embedded spaces are permitted, but multiple spaces are */
54 /* replaced by a single space. */
55 /* - Any character can be escaped by '\'. */
56 /* - The remainder of a line following '|' is ignored. */
57 /* */
58 /* Colons are sometimes delimiters depending on ColonOpt */
59 /* */
60 /*************************************************************************/
61 
62 
63 Boolean ReadName(FILE *f, String s, int n, char ColonOpt)
64 /* -------- */
65 {
66  register char *Sp=s;
67  register int c;
68  char Msg[2];
69 
70  /* Skip to first non-space character */
71 
72  while ( (c = InChar(f)) == '|' || Space(c) )
73  {
74  if ( c == '|' ) SkipComment;
75  }
76 
77  /* Return false if no names to read */
78 
79  if ( c == EOF )
80  {
81  Delimiter = EOF;
82  return false;
83  }
84 
85  /* Read in characters up to the next delimiter */
86 
87  while ( c != ColonOpt && c != ',' && c != '\n' && c != '|' && c != EOF )
88  {
89  if ( --n <= 0 )
90  {
91  if ( Of ) Error(LONGNAME, "", "");
92  }
93 
94  if ( c == '.' )
95  {
96  if ( (c = InChar(f)) == '|' || Space(c) || c == EOF ) break;
97  *Sp++ = '.';
98  continue;
99  }
100 
101  if ( c == '\\' )
102  {
103  c = InChar(f);
104  }
105 
106  if ( Space(c) )
107  {
108  *Sp++ = ' ';
109 
110  while ( ( c = InChar(f) ) == ' ' || c == '\t' )
111  ;
112  }
113  else
114  {
115  *Sp++ = c;
116  c = InChar(f);
117  }
118  }
119 
120  if ( c == '|' ) SkipComment;
121  Delimiter = c;
122 
123  /* Special case for ':=' */
124 
125  if ( Delimiter == ':' )
126  {
127  if ( *LBp == '=' )
128  {
129  Delimiter = '=';
130  LBp++;
131  }
132  }
133 
134  /* Strip trailing spaces */
135 
136  while ( Sp > s && Space(*(Sp-1)) ) Sp--;
137 
138  if ( Sp == s )
139  {
140  Msg[0] = ( Space(c) ? '.' : c );
141  Msg[1] = '\00';
142  Error(MISSNAME, Fn, Msg);
143  }
144 
145  *Sp++ = '\0';
146  return true;
147 }
148 
149 
150 
151 /*************************************************************************/
152 /* */
153 /* Read names of classes, attributes and legal attribute values. */
154 /* On completion, names are stored in: */
155 /* ClassName - class names */
156 /* AttName - attribute names */
157 /* AttValName - attribute value names */
158 /* with: */
159 /* MaxAttVal - number of values for each attribute */
160 /* */
161 /* Other global variables set are: */
162 /* MaxAtt - maximum attribute number */
163 /* MaxClass - maximum class number */
164 /* MaxDiscrVal - maximum discrete values for an attribute */
165 /* */
166 /*************************************************************************/
167 
168 
169 void GetNames(FILE *Nf)
170 /* -------- */
171 {
172  char Buffer[1000]="", *EndBuff;
173  int AttCeiling=100, ClassCeiling=100;
174  Attribute Att;
175  ClassNo c;
176 
177  ErrMsgs = AttExIn = 0;
178  LineNo = 0;
179  LBp = LineBuffer;
180  *LBp = 0;
181 
182  MaxClass = ClassAtt = LabelAtt = CWtAtt = 0;
183 
184  /* Get class names from names file. This entry can be:
185  - a list of discrete values separated by commas
186  - the name of the discrete attribute to use as the class
187  - the name of a continuous attribute followed by a colon and
188  a comma-separated list of thresholds used to segment it */
189 
190  ClassName = AllocZero(ClassCeiling, String);
191  do
192  {
193  ReadName(Nf, Buffer, 1000, ':');
194 
195  if ( ++MaxClass >= ClassCeiling)
196  {
197  ClassCeiling += 100;
198  Realloc(ClassName, ClassCeiling, String);
199  }
200  ClassName[MaxClass] = strdup(Buffer);
201  }
202  while ( Delimiter == ',' );
203 
204  if ( Delimiter == ':' )
205  {
206  /* Thresholds for continuous class attribute */
207 
208  ClassThresh = Alloc(ClassCeiling, ContValue);
209  MaxClass = 0;
210 
211  do
212  {
213  ReadName(Nf, Buffer, 1000, ':');
214 
215  if ( ++MaxClass >= ClassCeiling)
216  {
217  ClassCeiling += 100;
218  Realloc(ClassThresh, ClassCeiling, ContValue);
219  }
220 
221  ClassThresh[MaxClass] = strtod(Buffer, &EndBuff);
222  if ( EndBuff == Buffer || *EndBuff != '\0' )
223  {
224  Error(BADCLASSTHRESH, Buffer, Nil);
225  }
226  else
227  if ( MaxClass > 1 &&
229  {
230  Error(LEQCLASSTHRESH, Buffer, Nil);
231  }
232  }
233  while ( Delimiter == ',' );
234  }
235 
236  /* Get attribute and attribute value names from names file */
237 
238  AttName = AllocZero(AttCeiling, String);
239  MaxAttVal = AllocZero(AttCeiling, DiscrValue);
240  AttValName = AllocZero(AttCeiling, String *);
241  SpecialStatus = AllocZero(AttCeiling, char);
242  AttDef = AllocZero(AttCeiling, Definition);
243  AttDefUses = AllocZero(AttCeiling, Attribute *);
244 
245  MaxAtt = 0;
246  while ( ReadName(Nf, Buffer, 1000, ':') )
247  {
248  if ( Delimiter != ':' && Delimiter != '=' )
249  {
250  Error(BADATTNAME, Buffer, "");
251  }
252 
253  /* Check for attributes included/excluded */
254 
255  if ( ( *Buffer == 'a' || *Buffer == 'A' ) &&
256  ! memcmp(Buffer+1, "ttributes ", 10) &&
257  ! memcmp(Buffer+strlen(Buffer)-6, "cluded", 6) )
258  {
259  AttExIn = ( ! memcmp(Buffer+strlen(Buffer)-8, "in", 2) ? 1 : -1 );
260  if ( AttExIn == 1 )
261  {
262  ForEach(Att, 1, MaxAtt)
263  {
264  SpecialStatus[Att] |= SKIP;
265  }
266  }
267 
268  while ( ReadName(Nf, Buffer, 1000, ':') )
269  {
270  Att = Which(Buffer, AttName, 1, MaxAtt);
271  if ( ! Att )
272  {
273  Error(UNKNOWNATT, Buffer, Nil);
274  }
275  else
276  if ( AttExIn == 1 )
277  {
278  SpecialStatus[Att] -= SKIP;
279  }
280  else
281  {
282  SpecialStatus[Att] |= SKIP;
283  }
284  }
285 
286  break;
287  }
288 
289  if ( Which(Buffer, AttName, 1, MaxAtt) > 0 )
290  {
291  Error(DUPATTNAME, Buffer, Nil);
292  }
293 
294  if ( ++MaxAtt >= AttCeiling )
295  {
296  AttCeiling += 100;
297  Realloc(AttName, AttCeiling, String);
298  Realloc(MaxAttVal, AttCeiling, DiscrValue);
299  Realloc(AttValName, AttCeiling, String *);
300  Realloc(SpecialStatus, AttCeiling, char);
301  Realloc(AttDef, AttCeiling, Definition);
302  Realloc(AttDefUses, AttCeiling, Attribute *);
303  }
304 
305  AttName[MaxAtt] = strdup(Buffer);
307  AttDef[MaxAtt] = Nil;
308  MaxAttVal[MaxAtt] = 0;
309  AttDefUses[MaxAtt] = Nil;
310 
311  if ( Delimiter == '=' )
312  {
313  if ( MaxClass == 1 && ! strcmp(ClassName[1], AttName[MaxAtt]) )
314  {
315  Error(BADDEF3, Nil, Nil);
316  }
317 
318  ImplicitAtt(Nf);
319  ListAttsUsed();
320  }
321  else
322  {
323  ExplicitAtt(Nf);
324  }
325 
326  /* Check for case weight attribute, which must be type continuous */
327 
328  if ( ! strcmp(AttName[MaxAtt], "case weight") )
329  {
330  CWtAtt = MaxAtt;
331 
332  if ( ! Continuous(CWtAtt) )
333  {
334  Error(CWTATTERR, "", "");
335  }
336  }
337  }
338 
339  /* Check whether class is one of the attributes */
340 
341  if ( MaxClass == 1 || ClassThresh )
342  {
343  /* Class attribute must be present and must be either
344  a discrete attribute or a thresholded continuous attribute */
345 
347 
348  if ( ClassAtt <= 0 || Exclude(ClassAtt) )
349  {
350  Error(NOTARGET, ClassName[1], "");
351  }
352  else
353  if ( ClassThresh &&
354  ( ! Continuous(ClassAtt) ||
356  {
357  Error(BADCTARGET, ClassName[1], "");
358  }
359  else
360  if ( ! ClassThresh &&
362  {
363  Error(BADDTARGET, ClassName[1], "");
364  }
365 
366  Free(ClassName[1]);
367 
368  if ( ! ClassThresh )
369  {
370  Free(ClassName);
373  }
374  else
375  {
376  /* Set up class names as segments of continuous target att */
377 
378  MaxClass++;
380 
381  sprintf(Buffer, "%s <= %g", AttName[ClassAtt], ClassThresh[1]);
382  ClassName[1] = strdup(Buffer);
383 
384  ForEach(c, 2, MaxClass-1)
385  {
386  sprintf(Buffer, "%g < %s <= %g",
387  ClassThresh[c-1], AttName[ClassAtt], ClassThresh[c]);
388  ClassName[c] = strdup(Buffer);
389  }
390 
391  sprintf(Buffer, "%s > %g",
392  AttName[ClassAtt], ClassThresh[MaxClass-1]);
393  ClassName[MaxClass] = strdup(Buffer);
394  }
395  }
396 
397  /* Ignore case weight attribute if it is excluded; otherwise,
398  it cannot be used in models */
399 
400  if ( CWtAtt )
401  {
402  if ( Skip(CWtAtt) )
403  {
404  CWtAtt = 0;
405  }
406  else
407  {
409  }
410  }
411 
412  ClassName[0] = "?";
413 
414  fclose(Nf);
415 
416  if ( ErrMsgs > 0 ) Goodbye(1);
417 }
418 
419 
420 
421 /*************************************************************************/
422 /* */
423 /* Continuous or discrete attribute */
424 /* */
425 /*************************************************************************/
426 
427 
428 void ExplicitAtt(FILE *Nf)
429 /* ----------- */
430 {
431  char Buffer[1000]="", *p;
432  DiscrValue v;
433  int ValCeiling=100, BaseYear;
434  time_t clock;
435 
436  /* Read attribute type or first discrete value */
437 
438  if ( ! ( ReadName(Nf, Buffer, 1000, ':') ) )
439  {
440  Error(EOFINATT, AttName[MaxAtt], "");
441  }
442 
443  MaxAttVal[MaxAtt] = 0;
444 
445  if ( Delimiter != ',' )
446  {
447  /* Typed attribute */
448 
449  if ( ! strcmp(Buffer, "continuous") )
450  {
451  }
452  else
453  if ( ! strcmp(Buffer, "timestamp") )
454  {
456 
457  /* Set the base date if not done already */
458 
459  if ( ! TSBase )
460  {
461  clock = time(0);
462  BaseYear = gmtime(&clock)->tm_year + 1900;
463  SetTSBase(BaseYear);
464  }
465  }
466  else
467  if ( ! strcmp(Buffer, "date") )
468  {
470  }
471  else
472  if ( ! strcmp(Buffer, "time") )
473  {
475  }
476  else
477  if ( ! memcmp(Buffer, "discrete", 8) )
478  {
480 
481  /* Read max values and reserve space */
482 
483  v = atoi(&Buffer[8]);
484  if ( v < 2 )
485  {
487  }
488 
489  AttValName[MaxAtt] = Alloc(v+3, String);
490  AttValName[MaxAtt][0] = (char *) (long) v+1;
491  AttValName[MaxAtt][(MaxAttVal[MaxAtt]=1)] = strdup("N/A");
492  }
493  else
494  if ( ! strcmp(Buffer, "ignore") )
495  {
497  }
498  else
499  if ( ! strcmp(Buffer, "label") )
500  {
501  LabelAtt = MaxAtt;
503  }
504  else
505  {
506  /* Cannot have only one discrete value for an attribute */
507 
508  Error(SINGLEATTVAL, AttName[MaxAtt], Buffer);
509  }
510  }
511  else
512  {
513  /* Discrete attribute with explicit values */
514 
515  AttValName[MaxAtt] = AllocZero(ValCeiling, String);
516 
517  /* Add "N/A" unless this attribute is the class */
518 
519  if ( MaxClass > 1 || strcmp(ClassName[1], AttName[MaxAtt]) )
520  {
521  AttValName[MaxAtt][(MaxAttVal[MaxAtt]=1)] = strdup("N/A");
522  }
523  else
524  {
525  MaxAttVal[MaxAtt] = 0;
526  }
527 
528  p = Buffer;
529 
530  /* Special check for ordered attribute */
531 
532  if ( ! memcmp(Buffer, "[ordered]", 9) )
533  {
535 
536  for ( p = Buffer+9 ; Space(*p) ; p++ )
537  ;
538  }
539 
540  /* Record first real explicit value */
541 
542  AttValName[MaxAtt][++MaxAttVal[MaxAtt]] = strdup(p);
543 
544  /* Record remaining values */
545 
546  do
547  {
548  if ( ! ( ReadName(Nf, Buffer, 1000, ':') ) )
549  {
550  Error(EOFINATT, AttName[MaxAtt], "");
551  }
552 
553  if ( ++MaxAttVal[MaxAtt] >= ValCeiling )
554  {
555  ValCeiling += 100;
556  Realloc(AttValName[MaxAtt], ValCeiling, String);
557  }
558 
559  AttValName[MaxAtt][MaxAttVal[MaxAtt]] = strdup(Buffer);
560  }
561  while ( Delimiter == ',' );
562 
563  /* Cancel ordered status if <3 real values */
564 
565  if ( Ordered(MaxAtt) && MaxAttVal[MaxAtt] <= 3 )
566  {
567  SpecialStatus[MaxAtt] = 0;
568  }
569  if ( MaxAttVal[MaxAtt] > MaxDiscrVal ) MaxDiscrVal = MaxAttVal[MaxAtt];
570  }
571 }
572 
573 
574 
575 /*************************************************************************/
576 /* */
577 /* Locate value Val in List[First] to List[Last] */
578 /* */
579 /*************************************************************************/
580 
581 
582 int Which(String Val, String *List, int First, int Last)
583 /* ----- */
584 {
585  int n=First;
586 
587  while ( n <= Last && strcmp(Val, List[n]) ) n++;
588 
589  return ( n <= Last ? n : First-1 );
590 }
591 
592 
593 
594 /*************************************************************************/
595 /* */
596 /* Build list of attributes used in current attribute definition */
597 /* AttDefUses[Att][0] = number of atts used */
598 /* AttDefUses[Att][1..] are the atts */
599 /* */
600 /*************************************************************************/
601 
602 
604 /* ------------ */
605 {
606  Attribute Att;
607  Boolean *DefUses;
608  Definition D;
609  int e, NUsed=0;
610 
611  DefUses = AllocZero(MaxAtt+1, Boolean);
612 
613  D = AttDef[MaxAtt];
614 
615  for ( e = 0 ; ; e++ )
616  {
617  if ( DefOp(D[e]) == OP_ATT )
618  {
619  Att = (Attribute) DefSVal(D[e]);
620  if ( ! DefUses[Att] )
621  {
622  DefUses[Att] = true;
623  NUsed++;
624  }
625  }
626  else
627  if ( DefOp(D[e]) == OP_END )
628  {
629  break;
630  }
631  }
632 
633  if ( NUsed )
634  {
635  AttDefUses[MaxAtt] = Alloc(NUsed+1, Attribute);
636  AttDefUses[MaxAtt][0] = NUsed;
637 
638  NUsed=0;
639  ForEach(Att, 1, MaxAtt-1)
640  {
641  if ( DefUses[Att] )
642  {
643  AttDefUses[MaxAtt][++NUsed] = Att;
644  }
645  }
646  }
647 
648  Free(DefUses);
649 }
650 
651 
652 
653 /*************************************************************************/
654 /* */
655 /* Free up all space allocated by GetNames() */
656 /* */
657 /*************************************************************************/
658 
659 
660 void FreeNames()
661 /* --------- */
662 {
663  Attribute a, t;
664 
665  if ( ! AttName ) return;
666 
667  ForEach(a, 1, MaxAtt)
668  {
669  if ( a != ClassAtt && Discrete(a) )
670  {
671  FreeVector((void **) AttValName[a], 1, MaxAttVal[a]);
672  }
673  }
677  FreeVector((void **) AttName, 1, MaxAtt); AttName = Nil;
678  FreeVector((void **) ClassName, 1, MaxClass); ClassName = Nil;
679 
681 
682  /* Definitions (if any) */
683 
684  if ( AttDef )
685  {
686  ForEach(a, 1, MaxAtt)
687  {
688  if ( AttDef[a] )
689  {
690  for ( t = 0 ; DefOp(AttDef[a][t]) != OP_END ; t++ )
691  {
692  if ( DefOp(AttDef[a][t]) == OP_STR )
693  {
694  Free(DefSVal(AttDef[a][t]));
695  }
696  }
697 
698  Free(AttDef[a]);
699  Free(AttDefUses[a]);
700  }
701  }
702  Free(AttDef); AttDef = Nil;
704  }
705 }
706 
707 
708 
709 /*************************************************************************/
710 /* */
711 /* Read next char keeping track of line numbers */
712 /* */
713 /*************************************************************************/
714 
715 
716 int InChar(FILE *f)
717 /* ------ */
718 {
719  if ( ! *LBp )
720  {
721  LBp = LineBuffer;
722 
723  if ( ! fgets(LineBuffer, MAXLINEBUFFER, f) )
724  {
725  LineBuffer[0] = '\00';
726  return EOF;
727  }
728 
729  LineNo++;
730  }
731 
732  return (int) *LBp++;
733 }