src/datelex.c

   1 /*$Log: datelex.c,v $
   2  *Revision 1.2  2002-09-28 06:58:45  arjen
   3  *Bugfix: conversion of an empty string to a date or hour object
   4  *now makes the values of such an object 0 (null) instead of giving
   5  *a segmentation fault.
   6  *The class UTC combines the date and hour classes. The most basic
   7  *functions of the UTC class are now implemented.
   8  *These include constructors and conversion to and from String objects.
   9  *New functions: date::proper(), hour::proper() and UTC::proper().
  10  *Return true if the object holds a proper clock time and/or calendar
  11  *date; false if at least one value is out of range.
  12  *
  13  *Revision 1.1  2002/07/25 08:01:26  arjen
  14  *First checkin, AXE release 0.2
  15  *
  16  * Revision 1.1  84/09/01  15:01:14  wales
  17  * Initial revision
  18  *
  19  * Copyright (c) 1984 by Richard B. Wales
  20  *
  21  * Purpose:
  22  *
  23  *     Lexical analyzer for "parsedate" routine.  This lexer was orig-
  24  *     inally written in LEX, but rewriting it as an ad-hoc routine
  25  *     resulted in an enormous savings in space and a significant
  26  *     increase in speed.
  27  *
  28  * Usage:
  29  *
  30  *     Called as needed by the YACC parser ("dateyacc.c").  Not intended
  31  *     to be called from any other routine.
  32  *
  33  * Notes:
  34  *
  35  * Global contents:
  36  *
  37  *     int yylex ()
  38  *         Returns the token number (from the YACC grammar) of the next
  39  *         token in the input string pointed to by the global variable
  40  *         "yyinbuf".  The global variable "yylval" is set to the lexi-
  41  *         cal value (if any) of the token.  "yyinbuf" is set to point
  42  *         to the first character in the input string which is not a
  43  *         part of the token just recognized.
  44  *
  45  * Local contents:
  46  *
  47  *     struct wordtable *find_word (word) char *word;
  48  *         Returns a pointer to the entry in the "wordtable" array cor-
  49  *         responding to the string "word".  If "word" is not found, the
  50  *         returned value is NULL.
  51  */
  52
  53 /* ajs
  54  * ajs  Code added 850314 to allow NUM991231 and NUM99991231.
  55  * ajs  All added/changed lines contain "ajs" for easy searching.
  56  * ajs  */
  57
  58 /* AJB, Aug 28 1999:  Added month names in Dutch  */
  59
  60 #ifdef RCSIDENT
  61 static char rcsident[] = "$Header: /cvsroot/lib/AXE/src/datelex.c,v 1.2 2002-09-28 06:58:45 arjen Exp $";
  62 #endif /* RCSIDENT */
  63
  64 #include <stdio.h>
  65 #include <string.h>
  66 #include "dateyacc.h"
  67 #include "parsedate.h"
  68
  69 /* pointer to the input string */
  70 char *yyinbuf;
  71
  72 /* "answer" structure */
  73 struct parseddate yyans;
  74
  75 /* Binary-search word table.
  76  * Entries must be sorted in ascending order on "text" value, and the
  77  * total number of entries must be one less than a power of 2.  "Filler"
  78  * entries (with "token" values of -1) are inserted at the beginning and
  79  * end of the table to pad it as necessary.
  80  */
  81 #define WORDTABLE_SIZE 127      /* MUST be one less than power of 2 */
  82 #define MAX_WORD_LENGTH 20      /* used to weed out overly long words
  83                                  * in "yylex".  Must be at least as long
  84                                  * as the longest word in "wordtable",
  85                                  * but may be longer.
  86                                  */
  87 struct wordtable
  88     {   char *text;
  89         int   token;
  90         int   lexval;
  91     } wordtable[WORDTABLE_SIZE] =
  92     {/* text            token           lexval */
  93         "",             -1,             0,
  94         "",             -1,             0,
  95         "",             -1,             0,
  96         "",             -1,             0,
  97         "",             -1,             0,
  98         "",             -1,             0,
  99         "",             -1,             0,
 100         "",             -1,             0,
 101         "",             -1,             0,
 102         "",             -1,             0,
 103         "",             -1,             0,
 104         "A",            STD_ZONE,       60,     /* UTC+1h */
 105         "ACSST",        DST_ZONE,       630,    /* Cent. Australia */
 106         "ACST",         STD_ZONE,       570,    /* Cent. Australia */
 107         "ADT",          DST_ZONE,       -180,   /* Atlantic (Canada) */
 108         "AESST",        DST_ZONE,       660,    /* E. Australia */
 109         "AEST",         STD_ZONE,       600,    /* E. Australia */
 110         "AM",           AMPM,           0,
 111         "APR",          MONTH_NAME,     4,
 112         "APRIL",        MONTH_NAME,     4,
 113         "AST",          STD_ZONE,       -240,   /* Atlantic (Canada) */
 114         "AT",           0,              0,      /* "at" (throwaway) */
 115         "AUG",          MONTH_NAME,     8,
 116         "AUGUST",       MONTH_NAME,     8,
 117         "AWSST",        DST_ZONE,       540,    /* W. Australia */
 118         "AWST",         STD_ZONE,       480,    /* W. Australia */
 119         "B",            STD_ZONE,       120,    /* UTC+2h */
 120         "BST",          DST_ZONE,       60,     /* Great Britain */
 121         "C",            STD_ZONE,       180,    /* UTC+3h */
 122         "CDT",          DST_ZONE,       -300,
 123         "CST",          STD_ZONE,       -360,
 124         "D",            STD_ZONE,       240,    /* UTC+4h */
 125         "DEC",          MONTH_NAME,     12,
 126         "DECEMBER",     MONTH_NAME,     12,
 127         "DST",          DST_SUFFIX,     0,
 128         "E",            STD_ZONE,       300,    /* UTC+5h */
 129         "EDT",          DST_ZONE,       -240,
 130         "EET",          STD_ZONE,       120,    /* Eastern Europe */
 131         "EETDST",       DST_ZONE,       180,    /* Eastern Europe */
 132         "EST",          STD_ZONE,       -300,
 133         "F",            STD_ZONE,       360,    /* UTC+6h */
 134         "FEB",          MONTH_NAME,     2,
 135         "FEBRUARY",     MONTH_NAME,     2,
 136         "FRI",          DAY_NAME,       5,
 137         "FRIDAY",       DAY_NAME,       5,
 138         "G",            STD_ZONE,       420,    /* UTC+7h */
 139         "GMT",          STD_ZONE,       0,
 140         "H",            STD_ZONE,       480,    /* UTC+8h */
 141         "HDT",          DST_ZONE,       -540,   /* Hawaii/Alaska */
 142         "HST",          STD_ZONE,       -600,   /* Hawaii/Alaska */
 143         "I",            STD_ZONE,       540,    /* UTC+9h */
 144         "IST",          STD_ZONE,       120,    /* Israel */
 145         "JAN",          MONTH_NAME,     1,
 146         "JANUARY",      MONTH_NAME,     1,
 147         "JUL",          MONTH_NAME,     7,
 148         "JULY",         MONTH_NAME,     7,
 149         "JUN",          MONTH_NAME,     6,
 150         "JUNE",         MONTH_NAME,     6,
 151         "K",            STD_ZONE,       600,    /* UTC+10h */
 152         "L",            STD_ZONE,       660,    /* UTC+11h */
 153         "M",            STD_ZONE,       720,    /* UTC+12h */
 154         "MAR",          MONTH_NAME,     3,
 155         "MARCH",        MONTH_NAME,     3,
 156         "MAY",          MONTH_NAME,     5,
 157         "MDT",          DST_ZONE,       -360,
 158         "MEI",          MONTH_NAME,     5,
 159         "MET",          STD_ZONE,       60,     /* Central Europe */
 160         "METDST",       DST_ZONE,       120,    /* Central Europe */
 161         "MON",          DAY_NAME,       1,
 162         "MONDAY",       DAY_NAME,       1,
 163         "MRT",          MONTH_NAME,     3,
 164         "MST",          STD_ZONE,       -420,
 165         "N",            STD_ZONE,       -60,    /* UTC-1h */
 166         "NDT",          DST_ZONE,       -150,   /* Nfld. (Canada) */
 167         "NOV",          MONTH_NAME,     11,
 168         "NOVEMBER",     MONTH_NAME,     11,
 169         "NST",          STD_ZONE,       -210,   /* Nfld. (Canada) */
 170         "O",            STD_ZONE,       -120,   /* UTC-2h */
 171         "OCT",          MONTH_NAME,     10,
 172         "OCTOBER",      MONTH_NAME,     10,
 173         "OKT",          MONTH_NAME,     10,
 174         "ON",           0,              0,      /* "on" (throwaway) */
 175         "P",            STD_ZONE,       -180,   /* UTC-3h */
 176         "PDT",          DST_ZONE,       -420,
 177         "PM",           AMPM,           12,
 178         "PST",          STD_ZONE,       -480,
 179         "Q",            STD_ZONE,       -240,   /* UTC-4h */
 180         "R",            STD_ZONE,       -300,   /* UTC-5h */
 181         "S",            STD_ZONE,       -360,   /* UTC-6h */
 182         "SAT",          DAY_NAME,       6,
 183         "SATURDAY",     DAY_NAME,       6,
 184         "SEP",          MONTH_NAME,     9,
 185         "SEPT",         MONTH_NAME,     9,
 186         "SEPTEMBER",    MONTH_NAME,     9,
 187         "SUN",          DAY_NAME,       0,
 188         "SUNDAY",       DAY_NAME,       0,
 189         "T",            STD_ZONE,       -420,   /* UTC-7h */
 190         "THU",          DAY_NAME,       4,
 191         "THUR",         DAY_NAME,       4,
 192         "THURS",        DAY_NAME,       4,
 193         "THURSDAY",     DAY_NAME,       4,
 194         "TUE",          DAY_NAME,       2,
 195         "TUES",         DAY_NAME,       2,
 196         "TUESDAY",      DAY_NAME,       2,
 197         "U",            STD_ZONE,       -480,   /* UTC-8h */
 198         "UT",           STD_ZONE,       0,
 199         "UTC",          STD_ZONE,       0,
 200         "V",            STD_ZONE,       -540,   /* UTC-9h */
 201         "W",            STD_ZONE,       -600,   /* UTC-10h */
 202         "WED",          DAY_NAME,       3,
 203         "WEDNESDAY",    DAY_NAME,       3,
 204         "WEDS",         DAY_NAME,       3,
 205         "WET",          STD_ZONE,       0,      /* Western Europe */
 206         "WETDST",       DST_ZONE,       60,     /* Western Europe */
 207         "X",            STD_ZONE,       -660,   /* UTC-11h */
 208         "Y",            STD_ZONE,       -720,   /* UTC-12h */
 209         "YDT",          DST_ZONE,       -480,   /* Yukon */
 210         "YST",          STD_ZONE,       -540,   /* Yukon */
 211         "Z",            STD_ZONE,       0,      /* UTC */
 212         "\177",         -1,             0,
 213         "\177",         -1,             0,
 214         "\177",         -1,             0,
 215         "\177",         -1,             0,
 216         "\177",         -1,             0,
 217         "\177",         -1,             0,
 218         "\177",         -1,             0,
 219         "\177",         -1,             0,
 220     };
 221 static struct wordtable *find_word();
 222
 223 /* int yylex ()
 224  *     Return the next token for the YACC parser.
 225  */
 226 int
 227 yylex ()
 228 {   static char buffer[MAX_WORD_LENGTH+1];
 229     register char *c, *d;
 230     register struct wordtable *wt;
 231     register int num, ndgts;
 232
 233   restart:
 234     /* We will return here if an invalid input token is detected. */
 235     c = buffer; d = yyinbuf;
 236
 237     /* Skip over blanks, tabs, commas, and parentheses. */
 238     do
 239     {
 240        *c = *d++;
 241     }
 242     while (*c != '\0' && (*c == ' ' || *c == '\t' || *c == ','
 243                        || *c == '(' || *c == ')'));
 244
 245     /* A zero (null) byte signals the end of the input. */
 246     if (*c == 0)
 247     {   yyinbuf = --d;          /* stay put on the null */
 248         return 0;
 249     }
 250
 251     /* Process a word (looking it up in "wordtable"). */
 252     if ((*c >= 'A' && *c <= 'Z') || (*c >= 'a' && *c <= 'z'))
 253     {   if (*c >= 'a' && *c <= 'z') *c += 'A' - 'a';
 254         while (c < buffer + MAX_WORD_LENGTH
 255                && ((*d >= 'A' && *d <= 'Z')
 256                    || (*d >= 'a' && *d <= 'z')))
 257         {   *++c = *d++;
 258             if (*c >= 'a' && *c <= 'z') *c += 'A' - 'a';
 259         }
 260         if ((*d >= 'A' && *d <= 'Z') || (*d >= 'a' && *d <= 'z'))
 261         {   /* Word is too long (over MAX_WORD_LENGTH characters). */
 262             do { d++; } while ((*d >= 'A' && *d <= 'Z')
 263                                || (*d >= 'a' && *d <= 'z'));
 264             yyinbuf = d;
 265             goto error;
 266         }
 267         *++c = 0; yyinbuf = d;
 268         if ((wt = find_word (buffer)) == NULL) goto error;
 269         if (wt->token == 0) goto restart;       /* ignore this word */
 270         yylval.IntVal = wt->lexval;
 271         return wt->token;
 272     }
 273
 274     /* Process a number. */
 275     if (*c >= '0' && *c <= '9')
 276     {   num = *c - '0'; ndgts = 1;
 277         for (ndgts = 1; ndgts < 8 && *d >= '0' && *d <= '9'; ndgts++)  /* ajs */
 278             num = 10*num + (*d++ - '0');
 279         if (*d >= '0' && *d <= '9')
 280         {   /* Number is too long (over 8 digits). */           /* ajs */
 281             do { d++; } while (*d >= '0' && *d <= '9');
 282             yyinbuf = d;
 283             goto error;
 284         }
 285         yyinbuf = d;
 286         yylval.IntVal = num;
 287         switch (ndgts)
 288         {   case 1:  return NUM9;
 289             case 2:  if (num <= 23) return NUM23;
 290                      if (num <= 59) return NUM59;
 291                      /*otherwise*/  return NUM99;
 292             case 3:
 293             case 4:  if (num/100 <= 23 && num%100 <= 59) return NUM2359;
 294                      /*otherwise*/                       return NUM9999;
 295             case 5:
 296             case 6:  if (num/10000 <= 23
 297                          && (num%10000)/100 <= 59
 298                          && num%100 <= 59)
 299                          return NUM235959;
 300                      if ((((num % 10000) / 100) <= 12)  /* ajs */
 301                       &&  ((num % 100) <= 31))          /* ajs */
 302                          return NUM991231;              /* ajs */
 303                      /*otherwise*/   return NUM999999;
 304                      goto error;
 305             case 8:  if ((((num % 10000) / 100) <= 12)  /* ajs */
 306                       &&  ((num % 100) <= 31))          /* ajs */
 307                          return NUM99991231;            /* ajs */
 308                      goto error;                        /* ajs */
 309             default: goto error;
 310     }   }
 311
 312     /* Pass back the following delimiter tokens verbatim.. */
 313     if (*c == '-' || *c == '+' || *c == '/' || *c == ':' || *c == '.')
 314     {   yyinbuf = d;
 315         return *c;
 316     }
 317
 318   error:
 319     /* An unidentified character was found in the input. */
 320     yyinbuf = d;
 321     if (yyans.error == NULL) yyans.error = yyinbuf;
 322     goto restart;
 323 }
 324
 325 /* struct wordtable *find_word (word) char *word;
 326  *     Look up a word in the "wordtable" array via a binary search.
 327  */
 328 static
 329 struct wordtable *
 330 find_word (word)
 331     register char *word;
 332 {   register int low, mid, high;
 333     register int comparison;
 334
 335     low = -1;
 336     high = WORDTABLE_SIZE;
 337     while (low+1 < high)
 338     {   mid = (low + high) / 2;
 339         comparison = strcmp (wordtable[mid].text, word);
 340         if (comparison == 0) return wordtable+mid;
 341         if (comparison > 0)  high = mid;
 342         else                 low = mid;
 343     }
 344     return NULL;
 345 }