src/datelex.c

   1 /*$Log: datelex.c,v $
   2 /*Revision 1.1  2002-07-25 08:01:26  arjen
   3 /*First checkin, AXE release 0.2
   4 /*
   5  * Revision 1.1  84/09/01  15:01:14  wales
   6  * Initial revision
   7  *
   8  * Copyright (c) 1984 by Richard B. Wales
   9  *
  10  * Purpose:
  11  *
  12  *     Lexical analyzer for "parsedate" routine.  This lexer was orig-
  13  *     inally written in LEX, but rewriting it as an ad-hoc routine
  14  *     resulted in an enormous savings in space and a significant
  15  *     increase in speed.
  16  *
  17  * Usage:
  18  *
  19  *     Called as needed by the YACC parser ("dateyacc.c").  Not intended
  20  *     to be called from any other routine.
  21  *
  22  * Notes:
  23  *
  24  * Global contents:
  25  *
  26  *     int yylex ()
  27  *         Returns the token number (from the YACC grammar) of the next
  28  *         token in the input string pointed to by the global variable
  29  *         "yyinbuf".  The global variable "yylval" is set to the lexi-
  30  *         cal value (if any) of the token.  "yyinbuf" is set to point
  31  *         to the first character in the input string which is not a
  32  *         part of the token just recognized.
  33  *
  34  * Local contents:
  35  *
  36  *     struct wordtable *find_word (word) char *word;
  37  *         Returns a pointer to the entry in the "wordtable" array cor-
  38  *         responding to the string "word".  If "word" is not found, the
  39  *         returned value is NULL.
  40  */
  41
  42 /* ajs
  43  * ajs  Code added 850314 to allow NUM991231 and NUM99991231.
  44  * ajs  All added/changed lines contain "ajs" for easy searching.
  45  * ajs  */
  46
  47 /* AJB, Aug 28 1999:  Added month names in Dutch  */
  48
  49 #ifdef RCSIDENT
  50 static char rcsident[] = "$Header: /cvsroot/lib/AXE/src/datelex.c,v 1.1 2002-07-25 08:01:26 arjen Exp $";
  51 #endif /* RCSIDENT */
  52
  53 #include <stdio.h>
  54 #include "dateyacc.h"
  55 #include "parsedate.h"
  56
  57 /* pointer to the input string */
  58 char *yyinbuf;
  59
  60 /* "answer" structure */
  61 struct parseddate yyans;
  62
  63 /* Binary-search word table.
  64  * Entries must be sorted in ascending order on "text" value, and the
  65  * total number of entries must be one less than a power of 2.  "Filler"
  66  * entries (with "token" values of -1) are inserted at the beginning and
  67  * end of the table to pad it as necessary.
  68  */
  69 #define WORDTABLE_SIZE 127      /* MUST be one less than power of 2 */
  70 #define MAX_WORD_LENGTH 20      /* used to weed out overly long words
  71                                  * in "yylex".  Must be at least as long
  72                                  * as the longest word in "wordtable",
  73                                  * but may be longer.
  74                                  */
  75 struct wordtable
  76     {   char *text;
  77         int   token;
  78         int   lexval;
  79     } wordtable[WORDTABLE_SIZE] =
  80     {/* text            token           lexval */
  81         "",             -1,             0,
  82         "",             -1,             0,
  83         "",             -1,             0,
  84         "",             -1,             0,
  85         "",             -1,             0,
  86         "",             -1,             0,
  87         "",             -1,             0,
  88         "",             -1,             0,
  89         "",             -1,             0,
  90         "",             -1,             0,
  91         "",             -1,             0,
  92         "A",            STD_ZONE,       60,     /* UTC+1h */
  93         "ACSST",        DST_ZONE,       630,    /* Cent. Australia */
  94         "ACST",         STD_ZONE,       570,    /* Cent. Australia */
  95         "ADT",          DST_ZONE,       -180,   /* Atlantic (Canada) */
  96         "AESST",        DST_ZONE,       660,    /* E. Australia */
  97         "AEST",         STD_ZONE,       600,    /* E. Australia */
  98         "AM",           AMPM,           0,
  99         "APR",          MONTH_NAME,     4,
 100         "APRIL",        MONTH_NAME,     4,
 101         "AST",          STD_ZONE,       -240,   /* Atlantic (Canada) */
 102         "AT",           0,              0,      /* "at" (throwaway) */
 103         "AUG",          MONTH_NAME,     8,
 104         "AUGUST",       MONTH_NAME,     8,
 105         "AWSST",        DST_ZONE,       540,    /* W. Australia */
 106         "AWST",         STD_ZONE,       480,    /* W. Australia */
 107         "B",            STD_ZONE,       120,    /* UTC+2h */
 108         "BST",          DST_ZONE,       60,     /* Great Britain */
 109         "C",            STD_ZONE,       180,    /* UTC+3h */
 110         "CDT",          DST_ZONE,       -300,
 111         "CST",          STD_ZONE,       -360,
 112         "D",            STD_ZONE,       240,    /* UTC+4h */
 113         "DEC",          MONTH_NAME,     12,
 114         "DECEMBER",     MONTH_NAME,     12,
 115         "DST",          DST_SUFFIX,     0,
 116         "E",            STD_ZONE,       300,    /* UTC+5h */
 117         "EDT",          DST_ZONE,       -240,
 118         "EET",          STD_ZONE,       120,    /* Eastern Europe */
 119         "EETDST",       DST_ZONE,       180,    /* Eastern Europe */
 120         "EST",          STD_ZONE,       -300,
 121         "F",            STD_ZONE,       360,    /* UTC+6h */
 122         "FEB",          MONTH_NAME,     2,
 123         "FEBRUARY",     MONTH_NAME,     2,
 124         "FRI",          DAY_NAME,       5,
 125         "FRIDAY",       DAY_NAME,       5,
 126         "G",            STD_ZONE,       420,    /* UTC+7h */
 127         "GMT",          STD_ZONE,       0,
 128         "H",            STD_ZONE,       480,    /* UTC+8h */
 129         "HDT",          DST_ZONE,       -540,   /* Hawaii/Alaska */
 130         "HST",          STD_ZONE,       -600,   /* Hawaii/Alaska */
 131         "I",            STD_ZONE,       540,    /* UTC+9h */
 132         "IST",          STD_ZONE,       120,    /* Israel */
 133         "JAN",          MONTH_NAME,     1,
 134         "JANUARY",      MONTH_NAME,     1,
 135         "JUL",          MONTH_NAME,     7,
 136         "JULY",         MONTH_NAME,     7,
 137         "JUN",          MONTH_NAME,     6,
 138         "JUNE",         MONTH_NAME,     6,
 139         "K",            STD_ZONE,       600,    /* UTC+10h */
 140         "L",            STD_ZONE,       660,    /* UTC+11h */
 141         "M",            STD_ZONE,       720,    /* UTC+12h */
 142         "MAR",          MONTH_NAME,     3,
 143         "MARCH",        MONTH_NAME,     3,
 144         "MAY",          MONTH_NAME,     5,
 145         "MDT",          DST_ZONE,       -360,
 146         "MEI",          MONTH_NAME,     5,
 147         "MET",          STD_ZONE,       60,     /* Central Europe */
 148         "METDST",       DST_ZONE,       120,    /* Central Europe */
 149         "MON",          DAY_NAME,       1,
 150         "MONDAY",       DAY_NAME,       1,
 151         "MRT",          MONTH_NAME,     3,
 152         "MST",          STD_ZONE,       -420,
 153         "N",            STD_ZONE,       -60,    /* UTC-1h */
 154         "NDT",          DST_ZONE,       -150,   /* Nfld. (Canada) */
 155         "NOV",          MONTH_NAME,     11,
 156         "NOVEMBER",     MONTH_NAME,     11,
 157         "NST",          STD_ZONE,       -210,   /* Nfld. (Canada) */
 158         "O",            STD_ZONE,       -120,   /* UTC-2h */
 159         "OCT",          MONTH_NAME,     10,
 160         "OCTOBER",      MONTH_NAME,     10,
 161         "OKT",          MONTH_NAME,     10,
 162         "ON",           0,              0,      /* "on" (throwaway) */
 163         "P",            STD_ZONE,       -180,   /* UTC-3h */
 164         "PDT",          DST_ZONE,       -420,
 165         "PM",           AMPM,           12,
 166         "PST",          STD_ZONE,       -480,
 167         "Q",            STD_ZONE,       -240,   /* UTC-4h */
 168         "R",            STD_ZONE,       -300,   /* UTC-5h */
 169         "S",            STD_ZONE,       -360,   /* UTC-6h */
 170         "SAT",          DAY_NAME,       6,
 171         "SATURDAY",     DAY_NAME,       6,
 172         "SEP",          MONTH_NAME,     9,
 173         "SEPT",         MONTH_NAME,     9,
 174         "SEPTEMBER",    MONTH_NAME,     9,
 175         "SUN",          DAY_NAME,       0,
 176         "SUNDAY",       DAY_NAME,       0,
 177         "T",            STD_ZONE,       -420,   /* UTC-7h */
 178         "THU",          DAY_NAME,       4,
 179         "THUR",         DAY_NAME,       4,
 180         "THURS",        DAY_NAME,       4,
 181         "THURSDAY",     DAY_NAME,       4,
 182         "TUE",          DAY_NAME,       2,
 183         "TUES",         DAY_NAME,       2,
 184         "TUESDAY",      DAY_NAME,       2,
 185         "U",            STD_ZONE,       -480,   /* UTC-8h */
 186         "UT",           STD_ZONE,       0,
 187         "UTC",          STD_ZONE,       0,
 188         "V",            STD_ZONE,       -540,   /* UTC-9h */
 189         "W",            STD_ZONE,       -600,   /* UTC-10h */
 190         "WED",          DAY_NAME,       3,
 191         "WEDNESDAY",    DAY_NAME,       3,
 192         "WEDS",         DAY_NAME,       3,
 193         "WET",          STD_ZONE,       0,      /* Western Europe */
 194         "WETDST",       DST_ZONE,       60,     /* Western Europe */
 195         "X",            STD_ZONE,       -660,   /* UTC-11h */
 196         "Y",            STD_ZONE,       -720,   /* UTC-12h */
 197         "YDT",          DST_ZONE,       -480,   /* Yukon */
 198         "YST",          STD_ZONE,       -540,   /* Yukon */
 199         "Z",            STD_ZONE,       0,      /* UTC */
 200         "\177",         -1,             0,
 201         "\177",         -1,             0,
 202         "\177",         -1,             0,
 203         "\177",         -1,             0,
 204         "\177",         -1,             0,
 205         "\177",         -1,             0,
 206         "\177",         -1,             0,
 207         "\177",         -1,             0,
 208     };
 209 static struct wordtable *find_word();
 210
 211 /* int yylex ()
 212  *     Return the next token for the YACC parser.
 213  */
 214 int
 215 yylex ()
 216 {   static char buffer[MAX_WORD_LENGTH+1];
 217     register char *c, *d;
 218     register struct wordtable *wt;
 219     register int num, ndgts;
 220
 221   restart:
 222     /* We will return here if an invalid input token is detected. */
 223     c = buffer; d = yyinbuf;
 224
 225     /* Skip over blanks, tabs, commas, and parentheses. */
 226     do { *c = *d++; }
 227         while (*c == ' ' || *c == '\t' || *c == ','
 228                || *c == '(' || *c == ')');
 229
 230     /* A zero (null) byte signals the end of the input. */
 231     if (*c == 0)
 232     {   yyinbuf = --d;          /* stay put on the null */
 233         return 0;
 234     }
 235
 236     /* Process a word (looking it up in "wordtable"). */
 237     if ((*c >= 'A' && *c <= 'Z') || (*c >= 'a' && *c <= 'z'))
 238     {   if (*c >= 'a' && *c <= 'z') *c += 'A' - 'a';
 239         while (c < buffer + MAX_WORD_LENGTH
 240                && ((*d >= 'A' && *d <= 'Z')
 241                    || (*d >= 'a' && *d <= 'z')))
 242         {   *++c = *d++;
 243             if (*c >= 'a' && *c <= 'z') *c += 'A' - 'a';
 244         }
 245         if ((*d >= 'A' && *d <= 'Z') || (*d >= 'a' && *d <= 'z'))
 246         {   /* Word is too long (over MAX_WORD_LENGTH characters). */
 247             do { d++; } while ((*d >= 'A' && *d <= 'Z')
 248                                || (*d >= 'a' && *d <= 'z'));
 249             yyinbuf = d;
 250             goto error;
 251         }
 252         *++c = 0; yyinbuf = d;
 253         if ((wt = find_word (buffer)) == NULL) goto error;
 254         if (wt->token == 0) goto restart;       /* ignore this word */
 255         yylval.IntVal = wt->lexval;
 256         return wt->token;
 257     }
 258
 259     /* Process a number. */
 260     if (*c >= '0' && *c <= '9')
 261     {   num = *c - '0'; ndgts = 1;
 262         for (ndgts = 1; ndgts < 8 && *d >= '0' && *d <= '9'; ndgts++)  /* ajs */
 263             num = 10*num + (*d++ - '0');
 264         if (*d >= '0' && *d <= '9')
 265         {   /* Number is too long (over 8 digits). */           /* ajs */
 266             do { d++; } while (*d >= '0' && *d <= '9');
 267             yyinbuf = d;
 268             goto error;
 269         }
 270         yyinbuf = d;
 271         yylval.IntVal = num;
 272         switch (ndgts)
 273         {   case 1:  return NUM9;
 274             case 2:  if (num <= 23) return NUM23;
 275                      if (num <= 59) return NUM59;
 276                      /*otherwise*/  return NUM99;
 277             case 3:
 278             case 4:  if (num/100 <= 23 && num%100 <= 59) return NUM2359;
 279                      /*otherwise*/                       return NUM9999;
 280             case 5:
 281             case 6:  if (num/10000 <= 23
 282                          && (num%10000)/100 <= 59
 283                          && num%100 <= 59)
 284                          return NUM235959;
 285                      if ((((num % 10000) / 100) <= 12)  /* ajs */
 286                       &&  ((num % 100) <= 31))          /* ajs */
 287                          return NUM991231;              /* ajs */
 288                      goto error;
 289             case 8:  if ((((num % 10000) / 100) <= 12)  /* ajs */
 290                       &&  ((num % 100) <= 31))          /* ajs */
 291                          return NUM99991231;            /* ajs */
 292                      goto error;                        /* ajs */
 293             default: goto error;
 294     }   }
 295
 296     /* Pass back the following delimiter tokens verbatim.. */
 297     if (*c == '-' || *c == '+' || *c == '/' || *c == ':' || *c == '.')
 298     {   yyinbuf = d;
 299         return *c;
 300     }
 301
 302   error:
 303     /* An unidentified character was found in the input. */
 304     yyinbuf = d;
 305     if (yyans.error == NULL) yyans.error = yyinbuf;
 306     goto restart;
 307 }
 308
 309 /* struct wordtable *find_word (word) char *word;
 310  *     Look up a word in the "wordtable" array via a binary search.
 311  */
 312 static
 313 struct wordtable *
 314 find_word (word)
 315     register char *word;
 316 {   register int low, mid, high;
 317     register int comparison;
 318
 319     low = -1;
 320     high = WORDTABLE_SIZE;
 321     while (low+1 < high)
 322     {   mid = (low + high) / 2;
 323         comparison = strcmp (wordtable[mid].text, word);
 324         if (comparison == 0) return wordtable+mid;
 325         if (comparison > 0)  high = mid;
 326         else                 low = mid;
 327     }
 328     return NULL;
 329 }