mageec  0.1.0
MAchine Guided Energy Efficient Compilation
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
rules.c
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Copyright 2010 Rulequest Research Pty Ltd. */
4 /* */
5 /* This file is part of C5.0 GPL Edition, a single-threaded version */
6 /* of C5.0 release 2.07. */
7 /* */
8 /* C5.0 GPL Edition is free software: you can redistribute it and/or */
9 /* modify it under the terms of the GNU General Public License as */
10 /* published by the Free Software Foundation, either version 3 of the */
11 /* License, or (at your option) any later version. */
12 /* */
13 /* C5.0 GPL Edition is distributed in the hope that it will be useful, */
14 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
15 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */
16 /* General Public License for more details. */
17 /* */
18 /* You should have received a copy of the GNU General Public License */
19 /* (gpl.txt) along with C5.0 GPL Edition. If not, see */
20 /* */
21 /* <http://www.gnu.org/licenses/>. */
22 /* */
23 /*************************************************************************/
24 
25 
26 
27 /*************************************************************************/
28 /* */
29 /* Miscellaneous routines for rule handling */
30 /* ---------------------------------------- */
31 /* */
32 /*************************************************************************/
33 
34 
35 #include "defns.i"
36 #include "extern.i"
37 
38 
39 /*************************************************************************/
40 /* */
41 /* Add a new rule to the current ruleset, by updating Rule[], */
42 /* NRules and, if necessary, RuleSpace */
43 /* */
44 /*************************************************************************/
45 
46 
48  Boolean *Deleted, CRule Existing,
49  CaseCount Cover, CaseCount Correct, float Prior)
50 /* ------- */
51 {
52  int d, dd, id, r, Size=0, Bytes;
53  CaseNo i;
54  CRule R;
55  Condition *Lhs;
56  Boolean Exclude=false;
57  int Vote;
58 
59  /* Sort and copy the conditions if required */
60 
61  if ( ! Existing )
62  {
63  ForEach(d, 1, NCond)
64  {
65  if ( ! Deleted[d] ) Size++;
66  }
67 
68  Lhs = Alloc(Size+1, Condition);
69 
70  /* Sort conditions in print order */
71 
72  ForEach(d, 1, Size)
73  {
74  dd = 0;
75  ForEach(id, 1, NCond)
76  {
77  if ( ! Deleted[id] && ( ! dd || Before(Cond[id], Cond[dd]) ) )
78  {
79  dd = id;
80  }
81  }
82 
83  Lhs[d] = Alloc(1, CondRec);
84  memcpy(Lhs[d], Cond[dd], sizeof(CondRec));
85  if ( Lhs[d]->NodeType == BrSubset )
86  {
87  Bytes = (MaxAttVal[Lhs[d]->Tested]>>3) + 1;
88  Lhs[d]->Subset = Alloc(Bytes, Byte);
89  memcpy(Lhs[d]->Subset, Cond[dd]->Subset, Bytes);
90  }
91 
92  Deleted[dd] = true;
93  }
94  }
95  else
96  {
97  Lhs = Cond;
98  Size = NCond;
99  }
100 
101  Vote = 1000 * (Correct + 1.0) / (Cover + 2.0) + 0.5;
102 
103  /* See if rule already exists */
104 
105  for ( r = 1 ; ! Exclude && r <= NRules ; r++ )
106  {
107  if ( SameRule(r, Lhs, Size, TargetClass) )
108  {
109  Verbosity(1, fprintf(Of, "\tduplicates rule %d\n", r))
110 
111  /* Keep the most optimistic error estimate */
112 
113  if ( Vote > Rule[r]->Vote )
114  {
115  Rule[r]->Vote = Vote;
116  }
117 
118  Exclude = true;
119  }
120  }
121 
122  if ( Exclude )
123  {
124  if ( ! Existing )
125  {
126  ForEach(d, 1, Size)
127  {
128  if ( Lhs[d]->NodeType == BrSubset ) Free(Lhs[d]->Subset);
129  }
130  FreeVector((void **) Lhs, 1, Size);
131  }
132 
133  return false;
134  }
135 
136  /* Make sure there is enough room for the new rule */
137 
138  NRules++;
139  if ( NRules >= RuleSpace )
140  {
141  RuleSpace += 100;
142  if ( RuleSpace > 100 )
143  {
146  ForEach(r, RuleSpace-100, RuleSpace-1)
147  {
148  Fires[r] = Nil;
149  }
150  }
151  else
152  {
155  }
156  }
157 
158  /* Form the new rule */
159 
160  Rule[NRules] = R = Alloc(1, RuleRec);
161 
162  R->TNo = ( Existing ? Existing->TNo : Trial );
163  R->RNo = ( Existing ? Existing->RNo : NRules );
164  R->Size = Size;
165  R->Lhs = Lhs;
166  R->Rhs = TargetClass;
167  R->Cover = Cover;
168  R->Correct = Correct;
169  R->Prior = Prior;
170  R->Vote = Vote;
171 
172  /* Record entry in Fires and CovBy */
173 
174  ListSort(List, 1, List[0]);
176 
177  ForEach(i, 1, List[0])
178  {
179  CovBy[List[i]]++;
180  }
181 
182  Verbosity(1, if ( ! Existing ) PrintRule(R))
183 
184  return true;
185 }
186 
187 
188 
189 /*************************************************************************/
190 /* */
191 /* Compress list of ascending integers. */
192 /* */
193 /* The first integer occupies 4 bytes. Each subsequent integer is */
194 /* represented as the increment on the previous and is encoded as */
195 /* one or more bytes b0 + b1 + .... where */
196 /* if byte b < 128, value is b */
197 /* if byte b = 128 + x, value is x * 128 */
198 /* */
199 /* For example, an increment 4321 (= 33 * 128 + 97) is encoded as */
200 /* two bytes [128 + 33] [97] */
201 /* */
202 /*************************************************************************/
203 
204 
205 Byte *Compress(int *L)
206 /* -------- */
207 {
208  int i, Last=0, Entry, Blocks;
209  Byte *p, *Compressed;
210 
211  /* Copy first integer (uncompressed) */
212 
213  memcpy(CBuffer, L, 4);
214  p = CBuffer + 4;
215 
216  ForEach(i, 1, L[0])
217  {
218  Entry = L[i] - Last;
219  Last = L[i];
220 
221  /* Place any necessary skip bytes */
222 
223  while ( Entry > 127 )
224  {
225  Blocks = (Entry >> 7);
226  if ( Blocks > 127 ) Blocks = 127;
227  Entry -= Blocks * 128;
228  *p++ = Blocks + 128;
229  }
230 
231  *p++ = Entry;
232  }
233 
234  Compressed = Alloc(p - CBuffer, Byte);
235  memcpy(Compressed, CBuffer, p - CBuffer);
236 
237  return Compressed;
238 }
239 
240 
241 
242 void Uncompress(Byte *CL, int *UCL)
243 /* ---------- */
244 {
245  int i, Entry=0;
246  Byte *p;
247 
248  memcpy(UCL, CL, 4);
249  p = CL + 4;
250 
251  ForEach(i, 1, UCL[0])
252  {
253  while ( (*p) & 128 )
254  {
255  Entry += ((*p++) & 127) * 128;
256  }
257 
258  Entry = UCL[i] = Entry + *p++;
259  }
260 }
261 
262 
263 
264 /*************************************************************************/
265 /* */
266 /* Sort list in preparation for Compress() */
267 /* */
268 /*************************************************************************/
269 
270 
271 void ListSort(int *L, int Fp, int Lp)
272 /* -------- */
273 {
274  int i, High, Middle, Thresh, Temp;
275 
276  if ( Fp < Lp )
277  {
278  Thresh = L[(Fp+Lp) / 2];
279 
280  /* Divide cases into three groups:
281  Fp .. Middle-1: values < Thresh
282  Middle .. High: values = Thresh
283  High+1 .. Lp: values > Thresh */
284 
285  for ( Middle = Fp ; L[Middle] < Thresh ; Middle++ )
286  ;
287 
288  for ( High = Lp ; L[High] > Thresh ; High-- )
289  ;
290 
291  for ( i = Middle ; i <= High ; )
292  {
293  if ( L[i] < Thresh )
294  {
295  Temp = L[Middle];
296  L[Middle] = L[i];
297  L[i] = Temp;
298  Middle++;
299  i++;
300  }
301  else
302  if ( L[i] > Thresh )
303  {
304  Temp = L[High];
305  L[High] = L[i];
306  L[i] = Temp;
307  High--;
308  }
309  else
310  {
311  i++;
312  }
313  }
314 
315  /* Sort the first and third groups */
316 
317  ListSort(L, Fp, Middle-1);
318  ListSort(L, High+1, Lp);
319  }
320 }
321 
322 
323 
324 /*************************************************************************/
325 /* */
326 /* Decide whether the given rule duplicates rule r */
327 /* */
328 /*************************************************************************/
329 
330 
332 /* -------- */
333 {
334  int d, i, Bytes;
335 
336  if ( Rule[r]->Size != NConds || Rule[r]->Rhs != TargetClass )
337  {
338  return false;
339  }
340 
341  ForEach(d, 1, NConds)
342  {
343  if ( Rule[r]->Lhs[d]->NodeType != Cond[d]->NodeType ||
344  Rule[r]->Lhs[d]->Tested != Cond[d]->Tested )
345  {
346  return false;
347  }
348 
349  switch ( Cond[d]->NodeType )
350  {
351  case BrDiscr:
352  if ( Rule[r]->Lhs[d]->TestValue != Cond[d]->TestValue )
353  {
354  return false;
355  }
356  break;
357 
358  case BrThresh:
359  if ( Rule[r]->Lhs[d]->TestValue != Cond[d]->TestValue ||
360  Rule[r]->Lhs[d]->Cut != Cond[d]->Cut )
361  {
362  return false;
363  }
364  break;
365 
366  case BrSubset:
367  Bytes = (MaxAttVal[Cond[d]->Tested]>>3) + 1;
368  ForEach(i, 0, Bytes-1)
369  {
370  if ( Rule[r]->Lhs[d]->Subset[i] != Cond[d]->Subset[i] )
371  {
372  return false;
373  }
374  }
375  }
376  }
377 
378  return true;
379 }
380 
381 
382 
383 /*************************************************************************/
384 /* */
385 /* Free space occupied by a rule and a ruleset */
386 /* */
387 /*************************************************************************/
388 
389 
391 /* -------- */
392 {
393  int d;
394 
395  ForEach(d, 1, R->Size)
396  {
397  if ( R->Lhs[d]->NodeType == BrSubset )
398  {
399  FreeUnlessNil(R->Lhs[d]->Subset);
400  }
401  FreeUnlessNil(R->Lhs[d]);
402  }
403  FreeUnlessNil(R->Lhs);
404  FreeUnlessNil(R);
405 }
406 
407 
408 
410 /* --------- */
411 {
412  int ri;
413 
414  ForEach(ri, 1, RS->SNRules)
415  {
416  FreeRule(RS->SRule[ri]);
417  }
418  Free(RS->SRule);
419  FreeRuleTree(RS->RT);
420  Free(RS);
421 }
422 
423 
424 
425 /*************************************************************************/
426 /* */
427 /* Print a ruleset */
428 /* */
429 /*************************************************************************/
430 
431 
433 /* ---------- */
434 {
435  int r;
436 
437  fprintf(Of, "\n%s\n", Msg);
438 
439  ForEach(r, 1, RS->SNRules)
440  {
441  PrintRule(RS->SRule[r]);
442  }
443 }
444 
445 
446 
447 /*************************************************************************/
448 /* */
449 /* Print rule R */
450 /* */
451 /*************************************************************************/
452 
453 
455 /* --------- */
456 {
457  int d;
458 
459  fprintf(Of, T_RuleHeader);
460  if ( TRIALS > 1 ) fprintf(Of, "%d/", R->TNo);
461  fprintf(Of, "%d: (%.8g", R->RNo, P1(R->Cover));
462  if ( R->Correct < R->Cover - 0.1 )
463  {
464  fprintf(Of, "/%.8g", P1(R->Cover - R->Correct));
465  }
466  fprintf(Of, T_RuleLift, ((R->Correct + 1) / (R->Cover + 2)) / R->Prior);
467 
468  ForEach(d, 1, R->Size)
469  {
470  PrintCondition(R->Lhs[d]);
471  }
472 
473  fprintf(Of, "\t-> " T_class " %s [%.3f]\n",
474  ClassName[R->Rhs], R->Vote/1000.0);
475 }
476 
477 
478 
479 /*************************************************************************/
480 /* */
481 /* Print a condition C of a rule */
482 /* */
483 /*************************************************************************/
484 
485 
487 /* -------------- */
488 {
489  DiscrValue v, pv, Last, Values;
490  Boolean First=true;
491  Attribute Att;
492  int Col, Base, Entry;
493  char CVS[20];
494 
495  v = C->TestValue;
496  Att = C->Tested;
497 
498  fprintf(Of, "\t%s", AttName[Att]);
499 
500  if ( v < 0 )
501  {
502  fprintf(Of, T_IsUnknown);
503  return;
504  }
505 
506  switch ( C->NodeType )
507  {
508  case BrDiscr:
509  fprintf(Of, " = %s\n", AttValName[Att][v]);
510  break;
511 
512  case BrThresh:
513  if ( v == 1 )
514  {
515  fprintf(Of, " = N/A\n");
516  }
517  else
518  {
519  CValToStr(C->Cut, Att, CVS);
520  fprintf(Of, " %s %s\n", ( v == 2 ? "<=" : ">" ), CVS);
521  }
522  break;
523 
524  case BrSubset:
525  /* Count values at this branch */
526 
527  Values = Elements(Att, C->Subset, &Last);
528  if ( Values == 1 )
529  {
530  fprintf(Of, " = %s\n", AttValName[Att][Last]);
531  break;
532  }
533 
534  if ( Ordered(Att) )
535  {
536  /* Find first value */
537 
538  for ( pv = 1 ; ! In(pv, C->Subset) ; pv++ )
539  ;
540 
541  fprintf(Of, " %s [%s-%s]\n", T_InRange,
542  AttValName[Att][pv], AttValName[Att][Last]);
543  break;
544  }
545 
546  /* Must keep track of position to break long lines */
547 
548  fprintf(Of, " %s {", T_ElementOf);
549  Col = Base = CharWidth(AttName[Att]) + CharWidth(T_ElementOf) + 11;
550 
551  ForEach(pv, 1, MaxAttVal[Att])
552  {
553  if ( In(pv, C->Subset) )
554  {
555  Entry = CharWidth(AttValName[Att][pv]);
556 
557  if ( First )
558  {
559  First = false;
560  }
561  else
562  if ( Col + Entry + 2 >= Width )
563  {
564  Col = Base;
565  fprintf(Of, ",\n%*s", Col, "");
566  }
567  else
568  {
569  fprintf(Of, ", ");
570  Col += 2;
571  }
572 
573  fprintf(Of, "%s", AttValName[Att][pv]);
574  Col += Entry;
575  }
576  }
577  fprintf(Of, "}\n");
578  }
579 }