2 /*Revision 1.1 2002-07-25 08:01:26 arjen
3 /*First checkin, AXE release 0.2
5 * Revision 1.1 84/09/01 15:01:14 wales
8 * Copyright (c) 1984 by Richard B. Wales
12 * Lexical analyzer for "parsedate" routine. This lexer was orig-
13 * inally written in LEX, but rewriting it as an ad-hoc routine
14 * resulted in an enormous savings in space and a significant
19 * Called as needed by the YACC parser ("dateyacc.c"). Not intended
20 * to be called from any other routine.
27 * Returns the token number (from the YACC grammar) of the next
28 * token in the input string pointed to by the global variable
29 * "yyinbuf". The global variable "yylval" is set to the lexi-
30 * cal value (if any) of the token. "yyinbuf" is set to point
31 * to the first character in the input string which is not a
32 * part of the token just recognized.
36 * struct wordtable *find_word (word) char *word;
37 * Returns a pointer to the entry in the "wordtable" array cor-
38 * responding to the string "word". If "word" is not found, the
39 * returned value is NULL.
43 * ajs Code added 850314 to allow NUM991231 and NUM99991231.
44 * ajs All added/changed lines contain "ajs" for easy searching.
47 /* AJB, Aug 28 1999: Added month names in Dutch */
50 static char rcsident[] = "$Header: /cvsroot/lib/AXE/src/datelex.c,v 1.1 2002-07-25 08:01:26 arjen Exp $";
55 #include "parsedate.h"
57 /* pointer to the input string */
60 /* "answer" structure */
61 struct parseddate yyans;
63 /* Binary-search word table.
64 * Entries must be sorted in ascending order on "text" value, and the
65 * total number of entries must be one less than a power of 2. "Filler"
66 * entries (with "token" values of -1) are inserted at the beginning and
67 * end of the table to pad it as necessary.
69 #define WORDTABLE_SIZE 127 /* MUST be one less than power of 2 */
70 #define MAX_WORD_LENGTH 20 /* used to weed out overly long words
71 * in "yylex". Must be at least as long
72 * as the longest word in "wordtable",
79 } wordtable[WORDTABLE_SIZE] =
80 {/* text token lexval */
92 "A", STD_ZONE, 60, /* UTC+1h */
93 "ACSST", DST_ZONE, 630, /* Cent. Australia */
94 "ACST", STD_ZONE, 570, /* Cent. Australia */
95 "ADT", DST_ZONE, -180, /* Atlantic (Canada) */
96 "AESST", DST_ZONE, 660, /* E. Australia */
97 "AEST", STD_ZONE, 600, /* E. Australia */
100 "APRIL", MONTH_NAME, 4,
101 "AST", STD_ZONE, -240, /* Atlantic (Canada) */
102 "AT", 0, 0, /* "at" (throwaway) */
103 "AUG", MONTH_NAME, 8,
104 "AUGUST", MONTH_NAME, 8,
105 "AWSST", DST_ZONE, 540, /* W. Australia */
106 "AWST", STD_ZONE, 480, /* W. Australia */
107 "B", STD_ZONE, 120, /* UTC+2h */
108 "BST", DST_ZONE, 60, /* Great Britain */
109 "C", STD_ZONE, 180, /* UTC+3h */
110 "CDT", DST_ZONE, -300,
111 "CST", STD_ZONE, -360,
112 "D", STD_ZONE, 240, /* UTC+4h */
113 "DEC", MONTH_NAME, 12,
114 "DECEMBER", MONTH_NAME, 12,
115 "DST", DST_SUFFIX, 0,
116 "E", STD_ZONE, 300, /* UTC+5h */
117 "EDT", DST_ZONE, -240,
118 "EET", STD_ZONE, 120, /* Eastern Europe */
119 "EETDST", DST_ZONE, 180, /* Eastern Europe */
120 "EST", STD_ZONE, -300,
121 "F", STD_ZONE, 360, /* UTC+6h */
122 "FEB", MONTH_NAME, 2,
123 "FEBRUARY", MONTH_NAME, 2,
125 "FRIDAY", DAY_NAME, 5,
126 "G", STD_ZONE, 420, /* UTC+7h */
128 "H", STD_ZONE, 480, /* UTC+8h */
129 "HDT", DST_ZONE, -540, /* Hawaii/Alaska */
130 "HST", STD_ZONE, -600, /* Hawaii/Alaska */
131 "I", STD_ZONE, 540, /* UTC+9h */
132 "IST", STD_ZONE, 120, /* Israel */
133 "JAN", MONTH_NAME, 1,
134 "JANUARY", MONTH_NAME, 1,
135 "JUL", MONTH_NAME, 7,
136 "JULY", MONTH_NAME, 7,
137 "JUN", MONTH_NAME, 6,
138 "JUNE", MONTH_NAME, 6,
139 "K", STD_ZONE, 600, /* UTC+10h */
140 "L", STD_ZONE, 660, /* UTC+11h */
141 "M", STD_ZONE, 720, /* UTC+12h */
142 "MAR", MONTH_NAME, 3,
143 "MARCH", MONTH_NAME, 3,
144 "MAY", MONTH_NAME, 5,
145 "MDT", DST_ZONE, -360,
146 "MEI", MONTH_NAME, 5,
147 "MET", STD_ZONE, 60, /* Central Europe */
148 "METDST", DST_ZONE, 120, /* Central Europe */
150 "MONDAY", DAY_NAME, 1,
151 "MRT", MONTH_NAME, 3,
152 "MST", STD_ZONE, -420,
153 "N", STD_ZONE, -60, /* UTC-1h */
154 "NDT", DST_ZONE, -150, /* Nfld. (Canada) */
155 "NOV", MONTH_NAME, 11,
156 "NOVEMBER", MONTH_NAME, 11,
157 "NST", STD_ZONE, -210, /* Nfld. (Canada) */
158 "O", STD_ZONE, -120, /* UTC-2h */
159 "OCT", MONTH_NAME, 10,
160 "OCTOBER", MONTH_NAME, 10,
161 "OKT", MONTH_NAME, 10,
162 "ON", 0, 0, /* "on" (throwaway) */
163 "P", STD_ZONE, -180, /* UTC-3h */
164 "PDT", DST_ZONE, -420,
166 "PST", STD_ZONE, -480,
167 "Q", STD_ZONE, -240, /* UTC-4h */
168 "R", STD_ZONE, -300, /* UTC-5h */
169 "S", STD_ZONE, -360, /* UTC-6h */
171 "SATURDAY", DAY_NAME, 6,
172 "SEP", MONTH_NAME, 9,
173 "SEPT", MONTH_NAME, 9,
174 "SEPTEMBER", MONTH_NAME, 9,
176 "SUNDAY", DAY_NAME, 0,
177 "T", STD_ZONE, -420, /* UTC-7h */
180 "THURS", DAY_NAME, 4,
181 "THURSDAY", DAY_NAME, 4,
184 "TUESDAY", DAY_NAME, 2,
185 "U", STD_ZONE, -480, /* UTC-8h */
188 "V", STD_ZONE, -540, /* UTC-9h */
189 "W", STD_ZONE, -600, /* UTC-10h */
191 "WEDNESDAY", DAY_NAME, 3,
193 "WET", STD_ZONE, 0, /* Western Europe */
194 "WETDST", DST_ZONE, 60, /* Western Europe */
195 "X", STD_ZONE, -660, /* UTC-11h */
196 "Y", STD_ZONE, -720, /* UTC-12h */
197 "YDT", DST_ZONE, -480, /* Yukon */
198 "YST", STD_ZONE, -540, /* Yukon */
199 "Z", STD_ZONE, 0, /* UTC */
209 static struct wordtable *find_word();
212 * Return the next token for the YACC parser.
216 { static char buffer[MAX_WORD_LENGTH+1];
217 register char *c, *d;
218 register struct wordtable *wt;
219 register int num, ndgts;
222 /* We will return here if an invalid input token is detected. */
223 c = buffer; d = yyinbuf;
225 /* Skip over blanks, tabs, commas, and parentheses. */
227 while (*c == ' ' || *c == '\t' || *c == ','
228 || *c == '(' || *c == ')');
230 /* A zero (null) byte signals the end of the input. */
232 { yyinbuf = --d; /* stay put on the null */
236 /* Process a word (looking it up in "wordtable"). */
237 if ((*c >= 'A' && *c <= 'Z') || (*c >= 'a' && *c <= 'z'))
238 { if (*c >= 'a' && *c <= 'z') *c += 'A' - 'a';
239 while (c < buffer + MAX_WORD_LENGTH
240 && ((*d >= 'A' && *d <= 'Z')
241 || (*d >= 'a' && *d <= 'z')))
243 if (*c >= 'a' && *c <= 'z') *c += 'A' - 'a';
245 if ((*d >= 'A' && *d <= 'Z') || (*d >= 'a' && *d <= 'z'))
246 { /* Word is too long (over MAX_WORD_LENGTH characters). */
247 do { d++; } while ((*d >= 'A' && *d <= 'Z')
248 || (*d >= 'a' && *d <= 'z'));
252 *++c = 0; yyinbuf = d;
253 if ((wt = find_word (buffer)) == NULL) goto error;
254 if (wt->token == 0) goto restart; /* ignore this word */
255 yylval.IntVal = wt->lexval;
259 /* Process a number. */
260 if (*c >= '0' && *c <= '9')
261 { num = *c - '0'; ndgts = 1;
262 for (ndgts = 1; ndgts < 8 && *d >= '0' && *d <= '9'; ndgts++) /* ajs */
263 num = 10*num + (*d++ - '0');
264 if (*d >= '0' && *d <= '9')
265 { /* Number is too long (over 8 digits). */ /* ajs */
266 do { d++; } while (*d >= '0' && *d <= '9');
273 { case 1: return NUM9;
274 case 2: if (num <= 23) return NUM23;
275 if (num <= 59) return NUM59;
276 /*otherwise*/ return NUM99;
278 case 4: if (num/100 <= 23 && num%100 <= 59) return NUM2359;
279 /*otherwise*/ return NUM9999;
281 case 6: if (num/10000 <= 23
282 && (num%10000)/100 <= 59
285 if ((((num % 10000) / 100) <= 12) /* ajs */
286 && ((num % 100) <= 31)) /* ajs */
287 return NUM991231; /* ajs */
289 case 8: if ((((num % 10000) / 100) <= 12) /* ajs */
290 && ((num % 100) <= 31)) /* ajs */
291 return NUM99991231; /* ajs */
292 goto error; /* ajs */
296 /* Pass back the following delimiter tokens verbatim.. */
297 if (*c == '-' || *c == '+' || *c == '/' || *c == ':' || *c == '.')
303 /* An unidentified character was found in the input. */
305 if (yyans.error == NULL) yyans.error = yyinbuf;
309 /* struct wordtable *find_word (word) char *word;
310 * Look up a word in the "wordtable" array via a binary search.
316 { register int low, mid, high;
317 register int comparison;
320 high = WORDTABLE_SIZE;
322 { mid = (low + high) / 2;
323 comparison = strcmp (wordtable[mid].text, word);
324 if (comparison == 0) return wordtable+mid;
325 if (comparison > 0) high = mid;