src/datelex.c

   1 /*$Log: datelex.c,v $
   2 /*Revision 1.2  2002-09-28 06:58:45  arjen
   3 /*Bugfix: conversion of an empty string to a date or hour object
   4 /*now makes the values of such an object 0 (null) instead of giving
   5 /*a segmentation fault.
   6 /*The class UTC combines the date and hour classes. The most basic
   7 /*functions of the UTC class are now implemented.
   8 /*These include constructors and conversion to and from String objects.
   9 /*New functions: date::proper(), hour::proper() and UTC::proper().
  10 /*Return true if the object holds a proper clock time and/or calendar
  11 /*date; false if at least one value is out of range.
  12 /*
  13  *Revision 1.1  2002/07/25 08:01:26  arjen
  14  *First checkin, AXE release 0.2
  15  *
  16  * Revision 1.1  84/09/01  15:01:14  wales
  17  * Initial revision
  18  *
  19  * Copyright (c) 1984 by Richard B. Wales
  20  *
  21  * Purpose:
  22  *
  23  *     Lexical analyzer for "parsedate" routine.  This lexer was orig-
  24  *     inally written in LEX, but rewriting it as an ad-hoc routine
  25  *     resulted in an enormous savings in space and a significant
  26  *     increase in speed.
  27  *
  28  * Usage:
  29  *
  30  *     Called as needed by the YACC parser ("dateyacc.c").  Not intended
  31  *     to be called from any other routine.
  32  *
  33  * Notes:
  34  *
  35  * Global contents:
  36  *
  37  *     int yylex ()
  38  *         Returns the token number (from the YACC grammar) of the next
  39  *         token in the input string pointed to by the global variable
  40  *         "yyinbuf".  The global variable "yylval" is set to the lexi-
  41  *         cal value (if any) of the token.  "yyinbuf" is set to point
  42  *         to the first character in the input string which is not a
  43  *         part of the token just recognized.
  44  *
  45  * Local contents:
  46  *
  47  *     struct wordtable *find_word (word) char *word;
  48  *         Returns a pointer to the entry in the "wordtable" array cor-
  49  *         responding to the string "word".  If "word" is not found, the
  50  *         returned value is NULL.
  51  */
  52
  53 /* ajs
  54  * ajs  Code added 850314 to allow NUM991231 and NUM99991231.
  55  * ajs  All added/changed lines contain "ajs" for easy searching.
  56  * ajs  */
  57
  58 /* AJB, Aug 28 1999:  Added month names in Dutch  */
  59
  60 #ifdef RCSIDENT
  61 static char rcsident[] = "$Header: /cvsroot/lib/AXE/src/datelex.c,v 1.2 2002-09-28 06:58:45 arjen Exp $";
  62 #endif /* RCSIDENT */
  63
  64 #include <stdio.h>
  65 #include "dateyacc.h"
  66 #include "parsedate.h"
  67
  68 /* pointer to the input string */
  69 char *yyinbuf;
  70
  71 /* "answer" structure */
  72 struct parseddate yyans;
  73
  74 /* Binary-search word table.
  75  * Entries must be sorted in ascending order on "text" value, and the
  76  * total number of entries must be one less than a power of 2.  "Filler"
  77  * entries (with "token" values of -1) are inserted at the beginning and
  78  * end of the table to pad it as necessary.
  79  */
  80 #define WORDTABLE_SIZE 127      /* MUST be one less than power of 2 */
  81 #define MAX_WORD_LENGTH 20      /* used to weed out overly long words
  82                                  * in "yylex".  Must be at least as long
  83                                  * as the longest word in "wordtable",
  84                                  * but may be longer.
  85                                  */
  86 struct wordtable
  87     {   char *text;
  88         int   token;
  89         int   lexval;
  90     } wordtable[WORDTABLE_SIZE] =
  91     {/* text            token           lexval */
  92         "",             -1,             0,
  93         "",             -1,             0,
  94         "",             -1,             0,
  95         "",             -1,             0,
  96         "",             -1,             0,
  97         "",             -1,             0,
  98         "",             -1,             0,
  99         "",             -1,             0,
 100         "",             -1,             0,
 101         "",             -1,             0,
 102         "",             -1,             0,
 103         "A",            STD_ZONE,       60,     /* UTC+1h */
 104         "ACSST",        DST_ZONE,       630,    /* Cent. Australia */
 105         "ACST",         STD_ZONE,       570,    /* Cent. Australia */
 106         "ADT",          DST_ZONE,       -180,   /* Atlantic (Canada) */
 107         "AESST",        DST_ZONE,       660,    /* E. Australia */
 108         "AEST",         STD_ZONE,       600,    /* E. Australia */
 109         "AM",           AMPM,           0,
 110         "APR",          MONTH_NAME,     4,
 111         "APRIL",        MONTH_NAME,     4,
 112         "AST",          STD_ZONE,       -240,   /* Atlantic (Canada) */
 113         "AT",           0,              0,      /* "at" (throwaway) */
 114         "AUG",          MONTH_NAME,     8,
 115         "AUGUST",       MONTH_NAME,     8,
 116         "AWSST",        DST_ZONE,       540,    /* W. Australia */
 117         "AWST",         STD_ZONE,       480,    /* W. Australia */
 118         "B",            STD_ZONE,       120,    /* UTC+2h */
 119         "BST",          DST_ZONE,       60,     /* Great Britain */
 120         "C",            STD_ZONE,       180,    /* UTC+3h */
 121         "CDT",          DST_ZONE,       -300,
 122         "CST",          STD_ZONE,       -360,
 123         "D",            STD_ZONE,       240,    /* UTC+4h */
 124         "DEC",          MONTH_NAME,     12,
 125         "DECEMBER",     MONTH_NAME,     12,
 126         "DST",          DST_SUFFIX,     0,
 127         "E",            STD_ZONE,       300,    /* UTC+5h */
 128         "EDT",          DST_ZONE,       -240,
 129         "EET",          STD_ZONE,       120,    /* Eastern Europe */
 130         "EETDST",       DST_ZONE,       180,    /* Eastern Europe */
 131         "EST",          STD_ZONE,       -300,
 132         "F",            STD_ZONE,       360,    /* UTC+6h */
 133         "FEB",          MONTH_NAME,     2,
 134         "FEBRUARY",     MONTH_NAME,     2,
 135         "FRI",          DAY_NAME,       5,
 136         "FRIDAY",       DAY_NAME,       5,
 137         "G",            STD_ZONE,       420,    /* UTC+7h */
 138         "GMT",          STD_ZONE,       0,
 139         "H",            STD_ZONE,       480,    /* UTC+8h */
 140         "HDT",          DST_ZONE,       -540,   /* Hawaii/Alaska */
 141         "HST",          STD_ZONE,       -600,   /* Hawaii/Alaska */
 142         "I",            STD_ZONE,       540,    /* UTC+9h */
 143         "IST",          STD_ZONE,       120,    /* Israel */
 144         "JAN",          MONTH_NAME,     1,
 145         "JANUARY",      MONTH_NAME,     1,
 146         "JUL",          MONTH_NAME,     7,
 147         "JULY",         MONTH_NAME,     7,
 148         "JUN",          MONTH_NAME,     6,
 149         "JUNE",         MONTH_NAME,     6,
 150         "K",            STD_ZONE,       600,    /* UTC+10h */
 151         "L",            STD_ZONE,       660,    /* UTC+11h */
 152         "M",            STD_ZONE,       720,    /* UTC+12h */
 153         "MAR",          MONTH_NAME,     3,
 154         "MARCH",        MONTH_NAME,     3,
 155         "MAY",          MONTH_NAME,     5,
 156         "MDT",          DST_ZONE,       -360,
 157         "MEI",          MONTH_NAME,     5,
 158         "MET",          STD_ZONE,       60,     /* Central Europe */
 159         "METDST",       DST_ZONE,       120,    /* Central Europe */
 160         "MON",          DAY_NAME,       1,
 161         "MONDAY",       DAY_NAME,       1,
 162         "MRT",          MONTH_NAME,     3,
 163         "MST",          STD_ZONE,       -420,
 164         "N",            STD_ZONE,       -60,    /* UTC-1h */
 165         "NDT",          DST_ZONE,       -150,   /* Nfld. (Canada) */
 166         "NOV",          MONTH_NAME,     11,
 167         "NOVEMBER",     MONTH_NAME,     11,
 168         "NST",          STD_ZONE,       -210,   /* Nfld. (Canada) */
 169         "O",            STD_ZONE,       -120,   /* UTC-2h */
 170         "OCT",          MONTH_NAME,     10,
 171         "OCTOBER",      MONTH_NAME,     10,
 172         "OKT",          MONTH_NAME,     10,
 173         "ON",           0,              0,      /* "on" (throwaway) */
 174         "P",            STD_ZONE,       -180,   /* UTC-3h */
 175         "PDT",          DST_ZONE,       -420,
 176         "PM",           AMPM,           12,
 177         "PST",          STD_ZONE,       -480,
 178         "Q",            STD_ZONE,       -240,   /* UTC-4h */
 179         "R",            STD_ZONE,       -300,   /* UTC-5h */
 180         "S",            STD_ZONE,       -360,   /* UTC-6h */
 181         "SAT",          DAY_NAME,       6,
 182         "SATURDAY",     DAY_NAME,       6,
 183         "SEP",          MONTH_NAME,     9,
 184         "SEPT",         MONTH_NAME,     9,
 185         "SEPTEMBER",    MONTH_NAME,     9,
 186         "SUN",          DAY_NAME,       0,
 187         "SUNDAY",       DAY_NAME,       0,
 188         "T",            STD_ZONE,       -420,   /* UTC-7h */
 189         "THU",          DAY_NAME,       4,
 190         "THUR",         DAY_NAME,       4,
 191         "THURS",        DAY_NAME,       4,
 192         "THURSDAY",     DAY_NAME,       4,
 193         "TUE",          DAY_NAME,       2,
 194         "TUES",         DAY_NAME,       2,
 195         "TUESDAY",      DAY_NAME,       2,
 196         "U",            STD_ZONE,       -480,   /* UTC-8h */
 197         "UT",           STD_ZONE,       0,
 198         "UTC",          STD_ZONE,       0,
 199         "V",            STD_ZONE,       -540,   /* UTC-9h */
 200         "W",            STD_ZONE,       -600,   /* UTC-10h */
 201         "WED",          DAY_NAME,       3,
 202         "WEDNESDAY",    DAY_NAME,       3,
 203         "WEDS",         DAY_NAME,       3,
 204         "WET",          STD_ZONE,       0,      /* Western Europe */
 205         "WETDST",       DST_ZONE,       60,     /* Western Europe */
 206         "X",            STD_ZONE,       -660,   /* UTC-11h */
 207         "Y",            STD_ZONE,       -720,   /* UTC-12h */
 208         "YDT",          DST_ZONE,       -480,   /* Yukon */
 209         "YST",          STD_ZONE,       -540,   /* Yukon */
 210         "Z",            STD_ZONE,       0,      /* UTC */
 211         "\177",         -1,             0,
 212         "\177",         -1,             0,
 213         "\177",         -1,             0,
 214         "\177",         -1,             0,
 215         "\177",         -1,             0,
 216         "\177",         -1,             0,
 217         "\177",         -1,             0,
 218         "\177",         -1,             0,
 219     };
 220 static struct wordtable *find_word();
 221
 222 /* int yylex ()
 223  *     Return the next token for the YACC parser.
 224  */
 225 int
 226 yylex ()
 227 {   static char buffer[MAX_WORD_LENGTH+1];
 228     register char *c, *d;
 229     register struct wordtable *wt;
 230     register int num, ndgts;
 231
 232   restart:
 233     /* We will return here if an invalid input token is detected. */
 234     c = buffer; d = yyinbuf;
 235
 236     /* Skip over blanks, tabs, commas, and parentheses. */
 237     do
 238     {
 239        *c = *d++;
 240     }
 241     while (*c != '\0' && (*c == ' ' || *c == '\t' || *c == ','
 242                        || *c == '(' || *c == ')'));
 243
 244     /* A zero (null) byte signals the end of the input. */
 245     if (*c == 0)
 246     {   yyinbuf = --d;          /* stay put on the null */
 247         return 0;
 248     }
 249
 250     /* Process a word (looking it up in "wordtable"). */
 251     if ((*c >= 'A' && *c <= 'Z') || (*c >= 'a' && *c <= 'z'))
 252     {   if (*c >= 'a' && *c <= 'z') *c += 'A' - 'a';
 253         while (c < buffer + MAX_WORD_LENGTH
 254                && ((*d >= 'A' && *d <= 'Z')
 255                    || (*d >= 'a' && *d <= 'z')))
 256         {   *++c = *d++;
 257             if (*c >= 'a' && *c <= 'z') *c += 'A' - 'a';
 258         }
 259         if ((*d >= 'A' && *d <= 'Z') || (*d >= 'a' && *d <= 'z'))
 260         {   /* Word is too long (over MAX_WORD_LENGTH characters). */
 261             do { d++; } while ((*d >= 'A' && *d <= 'Z')
 262                                || (*d >= 'a' && *d <= 'z'));
 263             yyinbuf = d;
 264             goto error;
 265         }
 266         *++c = 0; yyinbuf = d;
 267         if ((wt = find_word (buffer)) == NULL) goto error;
 268         if (wt->token == 0) goto restart;       /* ignore this word */
 269         yylval.IntVal = wt->lexval;
 270         return wt->token;
 271     }
 272
 273     /* Process a number. */
 274     if (*c >= '0' && *c <= '9')
 275     {   num = *c - '0'; ndgts = 1;
 276         for (ndgts = 1; ndgts < 8 && *d >= '0' && *d <= '9'; ndgts++)  /* ajs */
 277             num = 10*num + (*d++ - '0');
 278         if (*d >= '0' && *d <= '9')
 279         {   /* Number is too long (over 8 digits). */           /* ajs */
 280             do { d++; } while (*d >= '0' && *d <= '9');
 281             yyinbuf = d;
 282             goto error;
 283         }
 284         yyinbuf = d;
 285         yylval.IntVal = num;
 286         switch (ndgts)
 287         {   case 1:  return NUM9;
 288             case 2:  if (num <= 23) return NUM23;
 289                      if (num <= 59) return NUM59;
 290                      /*otherwise*/  return NUM99;
 291             case 3:
 292             case 4:  if (num/100 <= 23 && num%100 <= 59) return NUM2359;
 293                      /*otherwise*/                       return NUM9999;
 294             case 5:
 295             case 6:  if (num/10000 <= 23
 296                          && (num%10000)/100 <= 59
 297                          && num%100 <= 59)
 298                          return NUM235959;
 299                      if ((((num % 10000) / 100) <= 12)  /* ajs */
 300                       &&  ((num % 100) <= 31))          /* ajs */
 301                          return NUM991231;              /* ajs */
 302                      goto error;
 303             case 8:  if ((((num % 10000) / 100) <= 12)  /* ajs */
 304                       &&  ((num % 100) <= 31))          /* ajs */
 305                          return NUM99991231;            /* ajs */
 306                      goto error;                        /* ajs */
 307             default: goto error;
 308     }   }
 309
 310     /* Pass back the following delimiter tokens verbatim.. */
 311     if (*c == '-' || *c == '+' || *c == '/' || *c == ':' || *c == '.')
 312     {   yyinbuf = d;
 313         return *c;
 314     }
 315
 316   error:
 317     /* An unidentified character was found in the input. */
 318     yyinbuf = d;
 319     if (yyans.error == NULL) yyans.error = yyinbuf;
 320     goto restart;
 321 }
 322
 323 /* struct wordtable *find_word (word) char *word;
 324  *     Look up a word in the "wordtable" array via a binary search.
 325  */
 326 static
 327 struct wordtable *
 328 find_word (word)
 329     register char *word;
 330 {   register int low, mid, high;
 331     register int comparison;
 332
 333     low = -1;
 334     high = WORDTABLE_SIZE;
 335     while (low+1 < high)
 336     {   mid = (low + high) / 2;
 337         comparison = strcmp (wordtable[mid].text, word);
 338         if (comparison == 0) return wordtable+mid;
 339         if (comparison > 0)  high = mid;
 340         else                 low = mid;
 341     }
 342     return NULL;
 343 }