mageec  0.1.0
MAchine Guided Energy Efficient Compilation
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
utility.c
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Copyright 2010 Rulequest Research Pty Ltd. */
4 /* */
5 /* This file is part of C5.0 GPL Edition, a single-threaded version */
6 /* of C5.0 release 2.07. */
7 /* */
8 /* C5.0 GPL Edition is free software: you can redistribute it and/or */
9 /* modify it under the terms of the GNU General Public License as */
10 /* published by the Free Software Foundation, either version 3 of the */
11 /* License, or (at your option) any later version. */
12 /* */
13 /* C5.0 GPL Edition is distributed in the hope that it will be useful, */
14 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
15 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */
16 /* General Public License for more details. */
17 /* */
18 /* You should have received a copy of the GNU General Public License */
19 /* (gpl.txt) along with C5.0 GPL Edition. If not, see */
20 /* */
21 /* <http://www.gnu.org/licenses/>. */
22 /* */
23 /*************************************************************************/
24 
25 
26 
27 /*************************************************************************/
28 /* */
29 /* Print header for all C5.0 programs */
30 /* ---------------------------------- */
31 /* */
32 /*************************************************************************/
33 
34 #include "defns.i"
35 #include "extern.i"
36 
37 
38 #define NAME T_C50
39 
40 
41 void PrintHeader(String Title)
42 /* ----------- */
43 {
44  char TitleLine[80];
45  time_t clock;
46  int Underline;
47 
48  clock = time(0);
49  sprintf(TitleLine, "%s%s [%s]", NAME, Title, TX_Release(RELEASE));
50  fprintf(Of, "\n%s \t%s", TitleLine, ctime(&clock));
51 
52  Underline = CharWidth(TitleLine);
53  while ( Underline-- ) putc('-', Of);
54  putc('\n', Of);
55 }
56 
57 
58 
59 /*************************************************************************/
60 /* */
61 /* This is a specialised form of the getopt utility. */
62 /* */
63 /*************************************************************************/
64 
65 
67 
68 
69 char ProcessOption(int Argc, char *Argv[], char *Options)
70 /* ------------- */
71 {
72  int i;
73  static int OptNo=1;
74 
75  if ( OptNo >= Argc ) return '\00';
76 
77  if ( *(Option = Argv[OptNo++]) != '-' ) return '?';
78 
79  for ( i = 0 ; Options[i] ; i++ )
80  {
81  if ( Options[i] == Option[1] )
82  {
83  OptArg = (char *) ( Options[i+1] != '+' ? Nil :
84  Option[2] ? Option+2 :
85  OptNo < Argc ? Argv[OptNo++] : "0" );
86  return Option[1];
87  }
88  }
89 
90  return '?';
91 }
92 
93 
94 
95 /*************************************************************************/
96 /* */
97 /* Protected memory allocation routines */
98 /* */
99 /*************************************************************************/
100 
101 
102 
103 void *Pmalloc(size_t Bytes)
104 /* ------- */
105 {
106  void *p=Nil;
107 
108  if ( ! Bytes || (p = (void *) malloc(Bytes)) )
109  {
110  return p;
111  }
112 
113  Error(NOMEM, "", "");
114 
115 }
116 
117 
118 
119 void *Prealloc(void *Present, size_t Bytes)
120 /* -------- */
121 {
122  void *p=Nil;
123 
124  if ( ! Bytes ) return Nil;
125 
126  if ( ! Present ) return Pmalloc(Bytes);
127 
128  if ( (p = (void *) realloc(Present, Bytes)) )
129  {
130  return p;
131  }
132 
133  Error(NOMEM, "", "");
134 
135 }
136 
137 
138 
139 void *Pcalloc(size_t Number, unsigned int Size)
140 /* ------- */
141 {
142  void *p=Nil;
143 
144  if ( ! Number || (p = (void *) calloc(Number, Size)) )
145  {
146  return p;
147  }
148 
149  Error(NOMEM, "", "");
150 
151 }
152 
153 
154 
155 void FreeVector(void **V, int First, int Last)
156 /* ---------- */
157 {
158  if ( V )
159  {
160  while ( First <= Last )
161  {
162  FreeUnlessNil(V[First]);
163  First++;
164  }
165 
166  Free(V);
167  }
168 }
169 
170 
171 
172 /*************************************************************************/
173 /* */
174 /* Special memory allocation routines for case memory */
175 /* */
176 /*************************************************************************/
177 
178 typedef struct _datablockrec *DataBlock;
179 
180 typedef struct _datablockrec
181  {
182  DataRec Head; /* first address */
183  int Allocated; /* number of cases in this block */
184  DataBlock Prev; /* previous data block */
185  }
186  DataBlockRec;
187 
188 DataBlock DataMem=Nil;
190 
191 
192 
194 /* ------- */
195 {
196  DataBlock Prev;
197 
198  if ( ! DataMem || DataMem->Allocated == DataBlockSize )
199  {
200  DataBlockSize = Min(8192, 262144 / (MaxAtt+2) + 1);
201 
202  Prev = DataMem;
203  DataMem = AllocZero(1, DataBlockRec);
204  DataMem->Head = Alloc(DataBlockSize * (MaxAtt+2), AttValue);
205  DataMem->Prev = Prev;
206  }
207 
208  return DataMem->Head + (DataMem->Allocated++) * (MaxAtt+2) + 1;
209 }
210 
211 
212 
213 void FreeCases()
214 /* --------- */
215 {
216  DataBlock Prev;
217 
218  while ( DataMem )
219  {
220  Prev = DataMem->Prev;
221  Free(DataMem->Head);
222  Free(DataMem);
223  DataMem = Prev;
224  }
225 }
226 
227 
228 
230 /* ------------ */
231 {
232  DataMem->Allocated--;
233 }
234 
235 
236 
237 /*************************************************************************/
238 /* */
239 /* Generate uniform random numbers */
240 /* */
241 /*************************************************************************/
242 
243 
244 #define Modify(F,S) if ( (F -= S) < 0 ) F += 1.0
245 
246 int KRFp=0, KRSp=0;
247 
248 double KRandom()
249 /* ------- */
250 {
251  static double URD[55];
252  double V1, V2;
253  int i, j;
254 
255  /* Initialisation */
256 
257  if ( KRFp == KRSp )
258  {
259  KRFp = 0;
260  KRSp = 31;
261 
262  V1 = 1.0;
263  V2 = 0.314159285;
264 
265  ForEach(i, 1, 55)
266  {
267  URD[ j = (i * 21) % 55 ] = V1;
268  V1 = V2 - V1;
269  if ( V1 < 0 ) V1 += 1.0;
270  V2 = URD[j];
271  }
272 
273  ForEach(j, 0, 5)
274  {
275  ForEach(i, 0, 54)
276  {
277  Modify(URD[i], URD[(i+30) % 55]);
278  }
279  }
280  }
281 
282  KRFp = (KRFp + 1) % 55;
283  KRSp = (KRSp + 1) % 55;
284  Modify(URD[KRFp], URD[KRSp]);
285 
286  return URD[KRFp];
287 }
288 
289 
290 
291 void ResetKR(int KRInit)
292 /* ------- */
293 {
294  KRFp = KRSp = 0;
295 
296  KRInit += 1000;
297  while ( KRInit-- )
298  {
299  KRandom();
300  }
301 }
302 
303 
304 
305 /*************************************************************************/
306 /* */
307 /* Error messages */
308 /* */
309 /*************************************************************************/
310 
311 
312 void Error(int ErrNo, String S1, String S2)
313 /* ----- */
314 {
315  Boolean Quit=false, WarningOnly=false;
316  char Buffer[10000], *Msg=Buffer;
317 
318 
319  if ( Of ) fprintf(Of, "\n");
320 
321  if ( ErrNo == NOFILE || ErrNo == NOMEM || ErrNo == MODELFILE )
322  {
323  sprintf(Msg, "*** ");
324  }
325  else
326  {
327  sprintf(Msg, TX_Line(LineNo, Fn));
328  }
329  Msg += strlen(Buffer);
330 
331  switch ( ErrNo )
332  {
333  case NOFILE:
334  sprintf(Msg, E_NOFILE(Fn, S2));
335  Quit = true;
336  break;
337 
338  case BADCLASSTHRESH:
339  sprintf(Msg, E_BADCLASSTHRESH, S1);
340  break;
341 
342  case LEQCLASSTHRESH:
343  sprintf(Msg, E_LEQCLASSTHRESH, S1);
344  break;
345 
346  case BADATTNAME:
347  sprintf(Msg, E_BADATTNAME, S1);
348  break;
349 
350  case EOFINATT:
351  sprintf(Msg, E_EOFINATT, S1);
352  break;
353 
354  case SINGLEATTVAL:
355  sprintf(Msg, E_SINGLEATTVAL(S1, S2));
356  break;
357 
358  case DUPATTNAME:
359  sprintf(Msg, E_DUPATTNAME, S1);
360  break;
361 
362  case CWTATTERR:
363  sprintf(Msg, E_CWTATTERR);
364  break;
365 
366  case BADATTVAL:
367  sprintf(Msg, E_BADATTVAL(S2, S1));
368  break;
369 
370  case BADNUMBER:
371  sprintf(Msg, E_BADNUMBER(S1));
372  break;
373 
374  case BADCLASS:
375  sprintf(Msg, E_BADCLASS, S2);
376  break;
377 
378  case BADCOSTCLASS:
379  sprintf(Msg, E_BADCOSTCLASS, S1);
380  Quit = true;
381  break;
382 
383  case BADCOST:
384  sprintf(Msg, E_BADCOST, S1);
385  Quit = true;
386  break;
387 
388  case NOMEM:
389  sprintf(Msg, E_NOMEM);
390  Quit = true;
391  break;
392 
393  case TOOMANYVALS:
394  sprintf(Msg, E_TOOMANYVALS(S1, (int) (long) S2));
395  break;
396 
397  case BADDISCRETE:
398  sprintf(Msg, E_BADDISCRETE, S1);
399  break;
400 
401  case NOTARGET:
402  sprintf(Msg, E_NOTARGET, S1);
403  Quit = true;
404  break;
405 
406  case BADCTARGET:
407  sprintf(Msg, E_BADCTARGET, S1);
408  Quit = true;
409  break;
410 
411  case BADDTARGET:
412  sprintf(Msg, E_BADDTARGET, S1);
413  Quit = true;
414  break;
415 
416  case LONGNAME:
417  sprintf(Msg, E_LONGNAME);
418  Quit = true;
419  break;
420 
421  case HITEOF:
422  sprintf(Msg, E_HITEOF);
423  break;
424 
425  case MISSNAME:
426  sprintf(Msg, E_MISSNAME, S2);
427  break;
428 
429  case BADTSTMP:
430  sprintf(Msg, E_BADTSTMP(S2, S1));
431  break;
432 
433  case BADDATE:
434  sprintf(Msg, E_BADDATE(S2, S1));
435  break;
436 
437  case BADTIME:
438  sprintf(Msg, E_BADTIME(S2, S1));
439  break;
440 
441  case UNKNOWNATT:
442  sprintf(Msg, E_UNKNOWNATT, S1);
443  break;
444 
445  case BADDEF1:
446  sprintf(Msg, E_BADDEF1(AttName[MaxAtt], S1, S2));
447  break;
448 
449  case BADDEF2:
450  sprintf(Msg, E_BADDEF2(AttName[MaxAtt], S1, S2));
451  break;
452 
453  case SAMEATT:
454  sprintf(Msg, E_SAMEATT(AttName[MaxAtt], S1));
455  WarningOnly = true;
456  break;
457 
458  case BADDEF3:
459  sprintf(Msg, E_BADDEF3, AttName[MaxAtt]);
460  break;
461 
462  case BADDEF4:
463  sprintf(Msg, E_BADDEF4, AttName[MaxAtt]);
464  WarningOnly = true;
465  break;
466 
467  case MODELFILE:
468  sprintf(Msg, EX_MODELFILE(Fn));
469  sprintf(Msg, " (%s `%s')\n", S1, S2);
470  Quit = true;
471  break;
472  }
473 
474  fprintf(Of, Buffer);
475 
476  if ( ! WarningOnly ) ErrMsgs++;
477 
478  if ( ErrMsgs == 10 )
479  {
480  fprintf(Of, T_ErrorLimit);
481  MaxCase--;
482  Quit = true;
483  }
484 
485  if ( Quit && Of )
486  {
487  Goodbye(1);
488  }
489 }
490 
491 
492 
493 /*************************************************************************/
494 /* */
495 /* Generate the label for a case */
496 /* */
497 /*************************************************************************/
498 
499 char LabelBuffer[1000];
500 
501 
503 /* --------- */
504 {
505  String p;
506 
507  if ( LabelAtt && (p = IgnoredVals + SVal(Case[N], LabelAtt)) )
508  ;
509  else
510  {
511  sprintf(LabelBuffer, "#%d", N+1);
512  p = LabelBuffer;
513  }
514 
515  return p;
516 }
517 
518 
519 
520 /*************************************************************************/
521 /* */
522 /* Open file with given extension for read/write */
523 /* */
524 /*************************************************************************/
525 
526 
527 FILE *GetFile(String Extension, String RW)
528 /* -------- */
529 {
530  strcpy(Fn, FileStem);
531  strcat(Fn, Extension);
532  return fopen(Fn, RW);
533 }
534 
535 
536 
537 /*************************************************************************/
538 /* */
539 /* Determine total elapsed time so far. */
540 /* */
541 /*************************************************************************/
542 
543 
544 #include <sys/time.h>
545 
546 double ExecTime()
547 /* -------- */
548 {
549  struct timeval TV;
550  struct timezone TZ={0,0};
551 
552  gettimeofday(&TV, &TZ);
553  return TV.tv_sec + TV.tv_usec / 1000000.0;
554 }
555 
556 
557 
558 /*************************************************************************/
559 /* */
560 /* Determine precision of floating value */
561 /* */
562 /*************************************************************************/
563 
564 
566 /* ----------- */
567 {
568  double RoundErr, Accuracy;
569  int Mult;
570 
571  Accuracy = fabs(Val) * 1E-6; /* approximate */
572  Val = modf(Val, &RoundErr);
573 
574  for ( Mult = 100000 ; Mult >= 1 ; Mult /= 10 )
575  {
576  RoundErr = fabs(rint(Val * Mult) / Mult - Val);
577  if ( RoundErr > 2 * Accuracy )
578  {
579  return Mult * 10;
580  }
581  }
582 
583  return 1;
584 }
585 
586 
587 
588 /*************************************************************************/
589 /* */
590 /* Routines to process date (Algorithm due to Gauss?) */
591 /* */
592 /*************************************************************************/
593 
594 
595 int GetInt(String S, int N)
596 /* ------ */
597 {
598  int Result=0;
599 
600  while ( N-- )
601  {
602  if ( ! isdigit(*S) ) return 0;
603 
604  Result = Result * 10 + (*S++ - '0');
605  }
606 
607  return Result;
608 }
609 
610 
611 int DateToDay(String DS) /* Day 1 is 0000/03/01 */
612 /* --------- */
613 {
614  int Year, Month, Day;
615 
616  if ( strlen(DS) != 10 ) return 0;
617 
618  Year = GetInt(DS, 4);
619  Month = GetInt(DS+5, 2);
620  Day = GetInt(DS+8, 2);
621 
622  if ( ! ( DS[4] == '/' && DS[7] == '/' || DS[4] == '-' && DS[7] == '-' ) ||
623  Year < 0 || Month < 1 || Day < 1 ||
624  Month > 12 ||
625  Day > 31 ||
626  Day > 30 &&
627  ( Month == 4 || Month == 6 || Month == 9 || Month == 11 ) ||
628  Month == 2 &&
629  ( Day > 29 ||
630  Day > 28 && ( Year % 4 != 0 ||
631  Year % 100 == 0 && Year % 400 != 0 ) ) )
632  {
633  return 0;
634  }
635 
636  if ( (Month -= 2) <= 0 )
637  {
638  Month += 12;
639  Year -= 1;
640  }
641 
642  return Year * 365 + Year / 4 - Year / 100 + Year / 400
643  + 367 * Month / 12
644  + Day - 30;
645 }
646 
647 
648 
649 void DayToDate(int Day, String Date)
650 /* --------- */
651 {
652  int Year, Month, OrigDay=Day;
653 
654  if ( Day <= 0 )
655  {
656  strcpy(Date, "?");
657  return;
658  }
659 
660  Year = (Day - 1) / 365.2425L; /* Year = completed years */
661  Day -= Year * 365 + Year / 4 - Year / 100 + Year / 400;
662 
663  if ( Day < 1 )
664  {
665  Year--;
666  Day = OrigDay - (Year * 365 + Year / 4 - Year / 100 + Year / 400);
667  }
668  else
669  if ( Day > 366 ||
670  Day == 366 &&
671  ( (Year+1) % 4 != 0 || (Year+1) % 100 == 0 && (Year+1) % 400 != 0 ) )
672  {
673  Year++;
674  Day = OrigDay - (Year * 365 + Year / 4 - Year / 100 + Year / 400);
675  }
676 
677  Month = (Day + 30) * 12 / 367;
678  Day -= 367 * Month / 12 - 30;
679  if ( Day < 1 )
680  {
681  Month = 11;
682  Day = 31;
683  }
684 
685  Month += 2;
686  if ( Month > 12 )
687  {
688  Month -= 12;
689  Year++;
690  }
691 
692  sprintf(Date, "%d/%d%d/%d%d", Year, Month/10, Month % 10, Day/10, Day % 10);
693 }
694 
695 
696 
697 /*************************************************************************/
698 /* */
699 /* Routines to process clock time and timestamps */
700 /* */
701 /*************************************************************************/
702 
703 
705 /* ---------- */
706 {
707  int Hour, Mins, Secs;
708 
709  if ( strlen(TS) != 8 ) return -1;
710 
711  Hour = GetInt(TS, 2);
712  Mins = GetInt(TS+3, 2);
713  Secs = GetInt(TS+6, 2);
714 
715  if ( TS[2] != ':' || TS[5] != ':' ||
716  Hour >= 24 || Mins >= 60 || Secs >= 60 )
717  {
718  return -1;
719  }
720 
721  return Hour * 3600 + Mins * 60 + Secs;
722 }
723 
724 
725 
726 void SecsToTime(int Secs, String Time)
727 /* ---------- */
728 {
729  int Hour, Mins;
730 
731  Hour = Secs / 3600;
732  Mins = (Secs % 3600) / 60;
733  Secs = Secs % 60;
734 
735  sprintf(Time, "%d%d:%d%d:%d%d",
736  Hour / 10, Hour % 10,
737  Mins / 10, Mins % 10,
738  Secs / 10, Secs % 10);
739 }
740 
741 
742 
743 void SetTSBase(int y)
744 /* --------- */
745 {
746  y -= 15;
747  TSBase = y * 365 + y / 4 - y / 100 + y / 400 + (367 * 4) / 12 + 1 - 30;
748 }
749 
750 
751 
753 /* ------------ */
754 {
755  int Day, Sec, i;
756 
757  /* Check for reasonable length and space between date and time */
758 
759  if ( strlen(TS) < 19 || ! Space(TS[10]) ) return (1 << 30);
760 
761  /* Read date part */
762 
763  TS[10] = '\00';
764  Day = DateToDay(TS);
765  TS[10] = ' ';
766 
767  /* Skip one or more spaces */
768 
769  for ( i = 11 ; TS[i] && Space(TS[i]) ; i++ )
770  ;
771 
772  /* Read time part */
773 
774  Sec = TimeToSecs(TS+i);
775 
776  /* Return a long time in the future if there is an error */
777 
778  return ( Day < 1 || Sec < 0 ? (1 << 30) :
779  (Day - TSBase) * 1440 + (Sec + 30) / 60 );
780 }
781 
782 
783 
784 /*************************************************************************/
785 /* */
786 /* Convert a continuous value to a string. DS must be */
787 /* large enough to hold any value (e.g. a date, time, ...) */
788 /* */
789 /*************************************************************************/
790 
791 
793 /* --------- */
794 {
795  int Mins;
796 
797  if ( TStampVal(Att) )
798  {
799  DayToDate(floor(CV / 1440) + TSBase, DS);
800  DS[10] = ' ';
801  Mins = rint(CV) - floor(CV / 1440) * 1440;
802  SecsToTime(Mins * 60, DS+11);
803  }
804  else
805  if ( DateVal(Att) )
806  {
807  DayToDate(CV, DS);
808  }
809  else
810  if ( TimeVal(Att) )
811  {
812  SecsToTime(CV, DS);
813  }
814  else
815  {
816  sprintf(DS, "%.*g", PREC, CV);
817  }
818 }
819 
820 
821 
822 /*************************************************************************/
823 /* */
824 /* Check parameter value */
825 /* */
826 /*************************************************************************/
827 
828 
829 void Check(float Val, float Low, float High)
830 /* ----- */
831 {
832  if ( Val < Low || Val > High )
833  {
834  fprintf(Of, TX_IllegalValue(Val, Low, High));
835  exit(1);
836  }
837 }
838 
839 
840 
841 
842 
843 /*************************************************************************/
844 /* */
845 /* Deallocate all dynamic storage */
846 /* */
847 /*************************************************************************/
848 
849 
850 void Cleanup()
851 /* ------- */
852 {
853  int t, r;
854 
855  extern DataRec *Blocked;
856  extern Tree *SubDef;
857  extern int SubSpace, ActiveSpace, PropValSize;
858  extern RuleNo *Active;
859  extern float *AttImp;
860  extern char *PropVal;
861  extern Boolean *Split, *Used;
862  extern FILE *Uf;
863 
864  NotifyStage(CLEANUP);
865 
866  CheckClose(Uf); Uf = Nil;
867  CheckClose(TRf); TRf = Nil;
868 
869  /* Boost voting (construct.c) */
870 
872 
873  /* Stuff from attribute winnowing */
874 
876  FreeUnlessNil(AttImp); AttImp = Nil;
877  FreeUnlessNil(Split); Split = Nil;
878  FreeUnlessNil(Used); Used = Nil;
879 
880  FreeUnlessNil(PropVal); PropVal = Nil;
881  PropValSize = 0;
882 
883  if ( RULES )
884  {
887  }
888 
889  /* May have interrupted a winnowing tree */
890 
891  if ( WINNOW && WTree )
892  {
893  FreeTree(WTree); WTree = Nil;
894  }
895 
896  FreeUnlessNil(Blocked); Blocked = Nil;
897 
898  FreeData();
899 
900  if ( MCost )
901  {
902  FreeVector((void **) MCost, 1, MaxClass); MCost = Nil;
904  }
905 
906  ForEach(t, 0, MaxTree)
907  {
908  FreeClassifier(t);
909  }
910 
911  if ( RULES )
912  {
913  /* May be incomplete ruleset in Rule[] */
914 
915  if ( Rule )
916  {
917  ForEach(r, 1, NRules)
918  {
919  FreeRule(Rule[r]);
920  }
921  Free(Rule); Rule = Nil;
922  }
923 
927  }
928 
929  FreeTreeData();
930 
931  FreeUnlessNil(Active); Active = Nil;
932  ActiveSpace = 0;
933 
937 
940 
941  FreeNames();
942 
943  FreeUnlessNil(SubDef); SubDef = Nil;
944  SubSpace = 0;
945  MaxCase = -1;
946 
947  NotifyStage(0);
948 }
949 
950 
951 
952 #ifdef UTF8
953 // //
955 // Routines for Unicode/UTF-8 processing //
956 // ------------------------------------- //
957 // //
959 
960 #include <wchar.h>
961 
962 
963 
964 /*************************************************************************/
965 /* */
966 /* Determine the total character width of a UTF-8 string */
967 /* */
968 /*************************************************************************/
969 
970 
971 int UTF8CharWidth(unsigned char *U)
972 /* ------------- */
973 {
974  int CWidth=0, Mask, This;
975  wchar_t Unicode;
976 
977  while ( *U )
978  {
979  Unicode = *U;
980 
981  if ( *U < 0x7F )
982  {
983  /* ASCII character */
984 
985  CWidth++;
986  U++;
987  }
988  else
989  {
990  /* Discard header bits */
991 
992  Mask = 0x80;
993  while ( Unicode & Mask )
994  {
995  Unicode ^= Mask;
996  Mask = Mask >> 1;
997  }
998 
999  while ( ((*(++U)) & 0xc0) == 0x80 )
1000  {
1001  Unicode = (Unicode << 6) | (*U & 0x3f);
1002  }
1003 
1004  if ( (This = wcwidth(Unicode)) > 0 ) CWidth += This;
1005  }
1006  }
1007 
1008  return CWidth;
1009 }
1010 
1011 
1012 
1014 // Public domain code to determine the width of a Unicode character //
1016 
1017 
1018 /*
1019  * This is an implementation of wcwidth() and wcswidth() as defined in
1020  * "The Single UNIX Specification, Version 2, The Open Group, 1997"
1021  * <http://www.UNIX-systems.org/online.html>
1022  *
1023  * Markus Kuhn -- 2000-02-08 -- public domain
1024  */
1025 
1026 //#include <wchar.h>
1027 
1028 /* These functions define the column width of an ISO 10646 character
1029  * as follows:
1030  *
1031  * - The null character (U+0000) has a column width of 0.
1032  *
1033  * - Other C0/C1 control characters and DEL will lead to a return
1034  * value of -1.
1035  *
1036  * - Non-spacing and enclosing combining characters (general
1037  * category code Mn or Me in the Unicode database) have a
1038  * column width of 0.
1039  *
1040  * - Spacing characters in the East Asian Wide (W) or East Asian
1041  * FullWidth (F) category as defined in Unicode Technical
1042  * Report #11 have a column width of 2.
1043  *
1044  * - All remaining characters (including all printable
1045  * ISO 8859-1 and WGL4 characters, Unicode control characters,
1046  * etc.) have a column width of 1.
1047  *
1048  * This implementation assumes that wchar_t characters are encoded
1049  * in ISO 10646.
1050  */
1051 
1052 int wcwidth(wchar_t ucs)
1053 {
1054  /* sorted list of non-overlapping intervals of non-spacing characters */
1055  static const struct interval {
1056  unsigned short first;
1057  unsigned short last;
1058  } combining[] = {
1059  { 0x0300, 0x034E }, { 0x0360, 0x0362 }, { 0x0483, 0x0486 },
1060  { 0x0488, 0x0489 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 },
1061  { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
1062  { 0x05C4, 0x05C4 }, { 0x064B, 0x0655 }, { 0x0670, 0x0670 },
1063  { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
1064  { 0x0711, 0x0711 }, { 0x0730, 0x074A }, { 0x07A6, 0x07B0 },
1065  { 0x0901, 0x0902 }, { 0x093C, 0x093C }, { 0x0941, 0x0948 },
1066  { 0x094D, 0x094D }, { 0x0951, 0x0954 }, { 0x0962, 0x0963 },
1067  { 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 },
1068  { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, { 0x0A02, 0x0A02 },
1069  { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 },
1070  { 0x0A4B, 0x0A4D }, { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 },
1071  { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 },
1072  { 0x0ACD, 0x0ACD }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
1073  { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
1074  { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
1075  { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
1076  { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBF, 0x0CBF },
1077  { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, { 0x0D41, 0x0D43 },
1078  { 0x0D4D, 0x0D4D }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 },
1079  { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A },
1080  { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 },
1081  { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 },
1082  { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 },
1083  { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 },
1084  { 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 },
1085  { 0x102D, 0x1030 }, { 0x1032, 0x1032 }, { 0x1036, 0x1037 },
1086  { 0x1039, 0x1039 }, { 0x1058, 0x1059 }, { 0x17B7, 0x17BD },
1087  { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x18A9, 0x18A9 },
1088  { 0x20D0, 0x20E3 }, { 0x302A, 0x302F }, { 0x3099, 0x309A },
1089  { 0xFB1E, 0xFB1E }, { 0xFE20, 0xFE23 }
1090  };
1091  int min = 0;
1092  int max = sizeof(combining) / sizeof(struct interval) - 1;
1093  int mid;
1094 
1095  /* test for 8-bit control characters */
1096  if (ucs == 0)
1097  return 0;
1098  if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0))
1099  return -1;
1100 
1101  /* first quick check for Latin-1 etc. characters */
1102  if (ucs < combining[0].first)
1103  return 1;
1104 
1105  /* binary search in table of non-spacing characters */
1106  while (max >= min) {
1107  mid = (min + max) / 2;
1108  if (combining[mid].last < ucs)
1109  min = mid + 1;
1110  else if (combining[mid].first > ucs)
1111  max = mid - 1;
1112  else if (combining[mid].first <= ucs && combining[mid].last >= ucs)
1113  return 0;
1114  }
1115 
1116  /* if we arrive here, ucs is not a combining or C0/C1 control character */
1117 
1118  /* fast test for majority of non-wide scripts */
1119  if (ucs < 0x1100)
1120  return 1;
1121 
1122  return 1 +
1123  ((ucs >= 0x1100 && ucs <= 0x115f) || /* Hangul Jamo */
1124  (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
1125  ucs != 0x303f) || /* CJK ... Yi */
1126  (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
1127  (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
1128  (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
1129  (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
1130  (ucs >= 0xffe0 && ucs <= 0xffe6));
1131 }
1132 
1133 
1134 int wcswidth(const wchar_t *pwcs, size_t n)
1135 {
1136  int w, width = 0;
1137 
1138  for (;*pwcs && n-- > 0; pwcs++)
1139  if ((w = wcwidth(*pwcs)) < 0)
1140  return -1;
1141  else
1142  width += w;
1143 
1144  return width;
1145 }
1146 #endif