2 *Revision 1.2 2002-09-28 06:58:45 arjen
3 *Bugfix: conversion of an empty string to a date or hour object
4 *now makes the values of such an object 0 (null) instead of giving
6 *The class UTC combines the date and hour classes. The most basic
7 *functions of the UTC class are now implemented.
8 *These include constructors and conversion to and from String objects.
9 *New functions: date::proper(), hour::proper() and UTC::proper().
10 *Return true if the object holds a proper clock time and/or calendar
11 *date; false if at least one value is out of range.
13 *Revision 1.1 2002/07/25 08:01:26 arjen
14 *First checkin, AXE release 0.2
16 * Revision 1.1 84/09/01 15:01:14 wales
19 * Copyright (c) 1984 by Richard B. Wales
23 * Lexical analyzer for "parsedate" routine. This lexer was orig-
24 * inally written in LEX, but rewriting it as an ad-hoc routine
25 * resulted in an enormous savings in space and a significant
30 * Called as needed by the YACC parser ("dateyacc.c"). Not intended
31 * to be called from any other routine.
38 * Returns the token number (from the YACC grammar) of the next
39 * token in the input string pointed to by the global variable
40 * "yyinbuf". The global variable "yylval" is set to the lexi-
41 * cal value (if any) of the token. "yyinbuf" is set to point
42 * to the first character in the input string which is not a
43 * part of the token just recognized.
47 * struct wordtable *find_word (word) char *word;
48 * Returns a pointer to the entry in the "wordtable" array cor-
49 * responding to the string "word". If "word" is not found, the
50 * returned value is NULL.
54 * ajs Code added 850314 to allow NUM991231 and NUM99991231.
55 * ajs All added/changed lines contain "ajs" for easy searching.
58 /* AJB, Aug 28 1999: Added month names in Dutch */
61 static char rcsident[] = "$Header: /cvsroot/lib/AXE/src/datelex.c,v 1.2 2002-09-28 06:58:45 arjen Exp $";
67 #include "parsedate.h"
69 /* pointer to the input string */
72 /* "answer" structure */
73 struct parseddate yyans;
75 /* Binary-search word table.
76 * Entries must be sorted in ascending order on "text" value, and the
77 * total number of entries must be one less than a power of 2. "Filler"
78 * entries (with "token" values of -1) are inserted at the beginning and
79 * end of the table to pad it as necessary.
81 #define WORDTABLE_SIZE 127 /* MUST be one less than power of 2 */
82 #define MAX_WORD_LENGTH 20 /* used to weed out overly long words
83 * in "yylex". Must be at least as long
84 * as the longest word in "wordtable",
91 } wordtable[WORDTABLE_SIZE] =
92 {/* text token lexval */
104 "A", STD_ZONE, 60, /* UTC+1h */
105 "ACSST", DST_ZONE, 630, /* Cent. Australia */
106 "ACST", STD_ZONE, 570, /* Cent. Australia */
107 "ADT", DST_ZONE, -180, /* Atlantic (Canada) */
108 "AESST", DST_ZONE, 660, /* E. Australia */
109 "AEST", STD_ZONE, 600, /* E. Australia */
111 "APR", MONTH_NAME, 4,
112 "APRIL", MONTH_NAME, 4,
113 "AST", STD_ZONE, -240, /* Atlantic (Canada) */
114 "AT", 0, 0, /* "at" (throwaway) */
115 "AUG", MONTH_NAME, 8,
116 "AUGUST", MONTH_NAME, 8,
117 "AWSST", DST_ZONE, 540, /* W. Australia */
118 "AWST", STD_ZONE, 480, /* W. Australia */
119 "B", STD_ZONE, 120, /* UTC+2h */
120 "BST", DST_ZONE, 60, /* Great Britain */
121 "C", STD_ZONE, 180, /* UTC+3h */
122 "CDT", DST_ZONE, -300,
123 "CST", STD_ZONE, -360,
124 "D", STD_ZONE, 240, /* UTC+4h */
125 "DEC", MONTH_NAME, 12,
126 "DECEMBER", MONTH_NAME, 12,
127 "DST", DST_SUFFIX, 0,
128 "E", STD_ZONE, 300, /* UTC+5h */
129 "EDT", DST_ZONE, -240,
130 "EET", STD_ZONE, 120, /* Eastern Europe */
131 "EETDST", DST_ZONE, 180, /* Eastern Europe */
132 "EST", STD_ZONE, -300,
133 "F", STD_ZONE, 360, /* UTC+6h */
134 "FEB", MONTH_NAME, 2,
135 "FEBRUARY", MONTH_NAME, 2,
137 "FRIDAY", DAY_NAME, 5,
138 "G", STD_ZONE, 420, /* UTC+7h */
140 "H", STD_ZONE, 480, /* UTC+8h */
141 "HDT", DST_ZONE, -540, /* Hawaii/Alaska */
142 "HST", STD_ZONE, -600, /* Hawaii/Alaska */
143 "I", STD_ZONE, 540, /* UTC+9h */
144 "IST", STD_ZONE, 120, /* Israel */
145 "JAN", MONTH_NAME, 1,
146 "JANUARY", MONTH_NAME, 1,
147 "JUL", MONTH_NAME, 7,
148 "JULY", MONTH_NAME, 7,
149 "JUN", MONTH_NAME, 6,
150 "JUNE", MONTH_NAME, 6,
151 "K", STD_ZONE, 600, /* UTC+10h */
152 "L", STD_ZONE, 660, /* UTC+11h */
153 "M", STD_ZONE, 720, /* UTC+12h */
154 "MAR", MONTH_NAME, 3,
155 "MARCH", MONTH_NAME, 3,
156 "MAY", MONTH_NAME, 5,
157 "MDT", DST_ZONE, -360,
158 "MEI", MONTH_NAME, 5,
159 "MET", STD_ZONE, 60, /* Central Europe */
160 "METDST", DST_ZONE, 120, /* Central Europe */
162 "MONDAY", DAY_NAME, 1,
163 "MRT", MONTH_NAME, 3,
164 "MST", STD_ZONE, -420,
165 "N", STD_ZONE, -60, /* UTC-1h */
166 "NDT", DST_ZONE, -150, /* Nfld. (Canada) */
167 "NOV", MONTH_NAME, 11,
168 "NOVEMBER", MONTH_NAME, 11,
169 "NST", STD_ZONE, -210, /* Nfld. (Canada) */
170 "O", STD_ZONE, -120, /* UTC-2h */
171 "OCT", MONTH_NAME, 10,
172 "OCTOBER", MONTH_NAME, 10,
173 "OKT", MONTH_NAME, 10,
174 "ON", 0, 0, /* "on" (throwaway) */
175 "P", STD_ZONE, -180, /* UTC-3h */
176 "PDT", DST_ZONE, -420,
178 "PST", STD_ZONE, -480,
179 "Q", STD_ZONE, -240, /* UTC-4h */
180 "R", STD_ZONE, -300, /* UTC-5h */
181 "S", STD_ZONE, -360, /* UTC-6h */
183 "SATURDAY", DAY_NAME, 6,
184 "SEP", MONTH_NAME, 9,
185 "SEPT", MONTH_NAME, 9,
186 "SEPTEMBER", MONTH_NAME, 9,
188 "SUNDAY", DAY_NAME, 0,
189 "T", STD_ZONE, -420, /* UTC-7h */
192 "THURS", DAY_NAME, 4,
193 "THURSDAY", DAY_NAME, 4,
196 "TUESDAY", DAY_NAME, 2,
197 "U", STD_ZONE, -480, /* UTC-8h */
200 "V", STD_ZONE, -540, /* UTC-9h */
201 "W", STD_ZONE, -600, /* UTC-10h */
203 "WEDNESDAY", DAY_NAME, 3,
205 "WET", STD_ZONE, 0, /* Western Europe */
206 "WETDST", DST_ZONE, 60, /* Western Europe */
207 "X", STD_ZONE, -660, /* UTC-11h */
208 "Y", STD_ZONE, -720, /* UTC-12h */
209 "YDT", DST_ZONE, -480, /* Yukon */
210 "YST", STD_ZONE, -540, /* Yukon */
211 "Z", STD_ZONE, 0, /* UTC */
221 static struct wordtable *find_word();
224 * Return the next token for the YACC parser.
228 { static char buffer[MAX_WORD_LENGTH+1];
229 register char *c, *d;
230 register struct wordtable *wt;
231 register int num, ndgts;
234 /* We will return here if an invalid input token is detected. */
235 c = buffer; d = yyinbuf;
237 /* Skip over blanks, tabs, commas, and parentheses. */
242 while (*c != '\0' && (*c == ' ' || *c == '\t' || *c == ','
243 || *c == '(' || *c == ')'));
245 /* A zero (null) byte signals the end of the input. */
247 { yyinbuf = --d; /* stay put on the null */
251 /* Process a word (looking it up in "wordtable"). */
252 if ((*c >= 'A' && *c <= 'Z') || (*c >= 'a' && *c <= 'z'))
253 { if (*c >= 'a' && *c <= 'z') *c += 'A' - 'a';
254 while (c < buffer + MAX_WORD_LENGTH
255 && ((*d >= 'A' && *d <= 'Z')
256 || (*d >= 'a' && *d <= 'z')))
258 if (*c >= 'a' && *c <= 'z') *c += 'A' - 'a';
260 if ((*d >= 'A' && *d <= 'Z') || (*d >= 'a' && *d <= 'z'))
261 { /* Word is too long (over MAX_WORD_LENGTH characters). */
262 do { d++; } while ((*d >= 'A' && *d <= 'Z')
263 || (*d >= 'a' && *d <= 'z'));
267 *++c = 0; yyinbuf = d;
268 if ((wt = find_word (buffer)) == NULL) goto error;
269 if (wt->token == 0) goto restart; /* ignore this word */
270 yylval.IntVal = wt->lexval;
274 /* Process a number. */
275 if (*c >= '0' && *c <= '9')
276 { num = *c - '0'; ndgts = 1;
277 for (ndgts = 1; ndgts < 8 && *d >= '0' && *d <= '9'; ndgts++) /* ajs */
278 num = 10*num + (*d++ - '0');
279 if (*d >= '0' && *d <= '9')
280 { /* Number is too long (over 8 digits). */ /* ajs */
281 do { d++; } while (*d >= '0' && *d <= '9');
288 { case 1: return NUM9;
289 case 2: if (num <= 23) return NUM23;
290 if (num <= 59) return NUM59;
291 /*otherwise*/ return NUM99;
293 case 4: if (num/100 <= 23 && num%100 <= 59) return NUM2359;
294 /*otherwise*/ return NUM9999;
296 case 6: if (num/10000 <= 23
297 && (num%10000)/100 <= 59
300 if ((((num % 10000) / 100) <= 12) /* ajs */
301 && ((num % 100) <= 31)) /* ajs */
302 return NUM991231; /* ajs */
303 /*otherwise*/ return NUM999999;
305 case 8: if ((((num % 10000) / 100) <= 12) /* ajs */
306 && ((num % 100) <= 31)) /* ajs */
307 return NUM99991231; /* ajs */
308 goto error; /* ajs */
312 /* Pass back the following delimiter tokens verbatim.. */
313 if (*c == '-' || *c == '+' || *c == '/' || *c == ':' || *c == '.')
319 /* An unidentified character was found in the input. */
321 if (yyans.error == NULL) yyans.error = yyinbuf;
325 /* struct wordtable *find_word (word) char *word;
326 * Look up a word in the "wordtable" array via a binary search.
332 { register int low, mid, high;
333 register int comparison;
336 high = WORDTABLE_SIZE;
338 { mid = (low + high) / 2;
339 comparison = strcmp (wordtable[mid].text, word);
340 if (comparison == 0) return wordtable+mid;
341 if (comparison > 0) high = mid;