2 /*Revision 1.2 2002-09-28 06:58:45 arjen
3 /*Bugfix: conversion of an empty string to a date or hour object
4 /*now makes the values of such an object 0 (null) instead of giving
5 /*a segmentation fault.
6 /*The class UTC combines the date and hour classes. The most basic
7 /*functions of the UTC class are now implemented.
8 /*These include constructors and conversion to and from String objects.
9 /*New functions: date::proper(), hour::proper() and UTC::proper().
10 /*Return true if the object holds a proper clock time and/or calendar
11 /*date; false if at least one value is out of range.
13 *Revision 1.1 2002/07/25 08:01:26 arjen
14 *First checkin, AXE release 0.2
16 * Revision 1.1 84/09/01 15:01:14 wales
19 * Copyright (c) 1984 by Richard B. Wales
23 * Lexical analyzer for "parsedate" routine. This lexer was orig-
24 * inally written in LEX, but rewriting it as an ad-hoc routine
25 * resulted in an enormous savings in space and a significant
30 * Called as needed by the YACC parser ("dateyacc.c"). Not intended
31 * to be called from any other routine.
38 * Returns the token number (from the YACC grammar) of the next
39 * token in the input string pointed to by the global variable
40 * "yyinbuf". The global variable "yylval" is set to the lexi-
41 * cal value (if any) of the token. "yyinbuf" is set to point
42 * to the first character in the input string which is not a
43 * part of the token just recognized.
47 * struct wordtable *find_word (word) char *word;
48 * Returns a pointer to the entry in the "wordtable" array cor-
49 * responding to the string "word". If "word" is not found, the
50 * returned value is NULL.
54 * ajs Code added 850314 to allow NUM991231 and NUM99991231.
55 * ajs All added/changed lines contain "ajs" for easy searching.
58 /* AJB, Aug 28 1999: Added month names in Dutch */
61 static char rcsident[] = "$Header: /cvsroot/lib/AXE/src/datelex.c,v 1.2 2002-09-28 06:58:45 arjen Exp $";
66 #include "parsedate.h"
68 /* pointer to the input string */
71 /* "answer" structure */
72 struct parseddate yyans;
74 /* Binary-search word table.
75 * Entries must be sorted in ascending order on "text" value, and the
76 * total number of entries must be one less than a power of 2. "Filler"
77 * entries (with "token" values of -1) are inserted at the beginning and
78 * end of the table to pad it as necessary.
80 #define WORDTABLE_SIZE 127 /* MUST be one less than power of 2 */
81 #define MAX_WORD_LENGTH 20 /* used to weed out overly long words
82 * in "yylex". Must be at least as long
83 * as the longest word in "wordtable",
90 } wordtable[WORDTABLE_SIZE] =
91 {/* text token lexval */
103 "A", STD_ZONE, 60, /* UTC+1h */
104 "ACSST", DST_ZONE, 630, /* Cent. Australia */
105 "ACST", STD_ZONE, 570, /* Cent. Australia */
106 "ADT", DST_ZONE, -180, /* Atlantic (Canada) */
107 "AESST", DST_ZONE, 660, /* E. Australia */
108 "AEST", STD_ZONE, 600, /* E. Australia */
110 "APR", MONTH_NAME, 4,
111 "APRIL", MONTH_NAME, 4,
112 "AST", STD_ZONE, -240, /* Atlantic (Canada) */
113 "AT", 0, 0, /* "at" (throwaway) */
114 "AUG", MONTH_NAME, 8,
115 "AUGUST", MONTH_NAME, 8,
116 "AWSST", DST_ZONE, 540, /* W. Australia */
117 "AWST", STD_ZONE, 480, /* W. Australia */
118 "B", STD_ZONE, 120, /* UTC+2h */
119 "BST", DST_ZONE, 60, /* Great Britain */
120 "C", STD_ZONE, 180, /* UTC+3h */
121 "CDT", DST_ZONE, -300,
122 "CST", STD_ZONE, -360,
123 "D", STD_ZONE, 240, /* UTC+4h */
124 "DEC", MONTH_NAME, 12,
125 "DECEMBER", MONTH_NAME, 12,
126 "DST", DST_SUFFIX, 0,
127 "E", STD_ZONE, 300, /* UTC+5h */
128 "EDT", DST_ZONE, -240,
129 "EET", STD_ZONE, 120, /* Eastern Europe */
130 "EETDST", DST_ZONE, 180, /* Eastern Europe */
131 "EST", STD_ZONE, -300,
132 "F", STD_ZONE, 360, /* UTC+6h */
133 "FEB", MONTH_NAME, 2,
134 "FEBRUARY", MONTH_NAME, 2,
136 "FRIDAY", DAY_NAME, 5,
137 "G", STD_ZONE, 420, /* UTC+7h */
139 "H", STD_ZONE, 480, /* UTC+8h */
140 "HDT", DST_ZONE, -540, /* Hawaii/Alaska */
141 "HST", STD_ZONE, -600, /* Hawaii/Alaska */
142 "I", STD_ZONE, 540, /* UTC+9h */
143 "IST", STD_ZONE, 120, /* Israel */
144 "JAN", MONTH_NAME, 1,
145 "JANUARY", MONTH_NAME, 1,
146 "JUL", MONTH_NAME, 7,
147 "JULY", MONTH_NAME, 7,
148 "JUN", MONTH_NAME, 6,
149 "JUNE", MONTH_NAME, 6,
150 "K", STD_ZONE, 600, /* UTC+10h */
151 "L", STD_ZONE, 660, /* UTC+11h */
152 "M", STD_ZONE, 720, /* UTC+12h */
153 "MAR", MONTH_NAME, 3,
154 "MARCH", MONTH_NAME, 3,
155 "MAY", MONTH_NAME, 5,
156 "MDT", DST_ZONE, -360,
157 "MEI", MONTH_NAME, 5,
158 "MET", STD_ZONE, 60, /* Central Europe */
159 "METDST", DST_ZONE, 120, /* Central Europe */
161 "MONDAY", DAY_NAME, 1,
162 "MRT", MONTH_NAME, 3,
163 "MST", STD_ZONE, -420,
164 "N", STD_ZONE, -60, /* UTC-1h */
165 "NDT", DST_ZONE, -150, /* Nfld. (Canada) */
166 "NOV", MONTH_NAME, 11,
167 "NOVEMBER", MONTH_NAME, 11,
168 "NST", STD_ZONE, -210, /* Nfld. (Canada) */
169 "O", STD_ZONE, -120, /* UTC-2h */
170 "OCT", MONTH_NAME, 10,
171 "OCTOBER", MONTH_NAME, 10,
172 "OKT", MONTH_NAME, 10,
173 "ON", 0, 0, /* "on" (throwaway) */
174 "P", STD_ZONE, -180, /* UTC-3h */
175 "PDT", DST_ZONE, -420,
177 "PST", STD_ZONE, -480,
178 "Q", STD_ZONE, -240, /* UTC-4h */
179 "R", STD_ZONE, -300, /* UTC-5h */
180 "S", STD_ZONE, -360, /* UTC-6h */
182 "SATURDAY", DAY_NAME, 6,
183 "SEP", MONTH_NAME, 9,
184 "SEPT", MONTH_NAME, 9,
185 "SEPTEMBER", MONTH_NAME, 9,
187 "SUNDAY", DAY_NAME, 0,
188 "T", STD_ZONE, -420, /* UTC-7h */
191 "THURS", DAY_NAME, 4,
192 "THURSDAY", DAY_NAME, 4,
195 "TUESDAY", DAY_NAME, 2,
196 "U", STD_ZONE, -480, /* UTC-8h */
199 "V", STD_ZONE, -540, /* UTC-9h */
200 "W", STD_ZONE, -600, /* UTC-10h */
202 "WEDNESDAY", DAY_NAME, 3,
204 "WET", STD_ZONE, 0, /* Western Europe */
205 "WETDST", DST_ZONE, 60, /* Western Europe */
206 "X", STD_ZONE, -660, /* UTC-11h */
207 "Y", STD_ZONE, -720, /* UTC-12h */
208 "YDT", DST_ZONE, -480, /* Yukon */
209 "YST", STD_ZONE, -540, /* Yukon */
210 "Z", STD_ZONE, 0, /* UTC */
220 static struct wordtable *find_word();
223 * Return the next token for the YACC parser.
227 { static char buffer[MAX_WORD_LENGTH+1];
228 register char *c, *d;
229 register struct wordtable *wt;
230 register int num, ndgts;
233 /* We will return here if an invalid input token is detected. */
234 c = buffer; d = yyinbuf;
236 /* Skip over blanks, tabs, commas, and parentheses. */
241 while (*c != '\0' && (*c == ' ' || *c == '\t' || *c == ','
242 || *c == '(' || *c == ')'));
244 /* A zero (null) byte signals the end of the input. */
246 { yyinbuf = --d; /* stay put on the null */
250 /* Process a word (looking it up in "wordtable"). */
251 if ((*c >= 'A' && *c <= 'Z') || (*c >= 'a' && *c <= 'z'))
252 { if (*c >= 'a' && *c <= 'z') *c += 'A' - 'a';
253 while (c < buffer + MAX_WORD_LENGTH
254 && ((*d >= 'A' && *d <= 'Z')
255 || (*d >= 'a' && *d <= 'z')))
257 if (*c >= 'a' && *c <= 'z') *c += 'A' - 'a';
259 if ((*d >= 'A' && *d <= 'Z') || (*d >= 'a' && *d <= 'z'))
260 { /* Word is too long (over MAX_WORD_LENGTH characters). */
261 do { d++; } while ((*d >= 'A' && *d <= 'Z')
262 || (*d >= 'a' && *d <= 'z'));
266 *++c = 0; yyinbuf = d;
267 if ((wt = find_word (buffer)) == NULL) goto error;
268 if (wt->token == 0) goto restart; /* ignore this word */
269 yylval.IntVal = wt->lexval;
273 /* Process a number. */
274 if (*c >= '0' && *c <= '9')
275 { num = *c - '0'; ndgts = 1;
276 for (ndgts = 1; ndgts < 8 && *d >= '0' && *d <= '9'; ndgts++) /* ajs */
277 num = 10*num + (*d++ - '0');
278 if (*d >= '0' && *d <= '9')
279 { /* Number is too long (over 8 digits). */ /* ajs */
280 do { d++; } while (*d >= '0' && *d <= '9');
287 { case 1: return NUM9;
288 case 2: if (num <= 23) return NUM23;
289 if (num <= 59) return NUM59;
290 /*otherwise*/ return NUM99;
292 case 4: if (num/100 <= 23 && num%100 <= 59) return NUM2359;
293 /*otherwise*/ return NUM9999;
295 case 6: if (num/10000 <= 23
296 && (num%10000)/100 <= 59
299 if ((((num % 10000) / 100) <= 12) /* ajs */
300 && ((num % 100) <= 31)) /* ajs */
301 return NUM991231; /* ajs */
303 case 8: if ((((num % 10000) / 100) <= 12) /* ajs */
304 && ((num % 100) <= 31)) /* ajs */
305 return NUM99991231; /* ajs */
306 goto error; /* ajs */
310 /* Pass back the following delimiter tokens verbatim.. */
311 if (*c == '-' || *c == '+' || *c == '/' || *c == ':' || *c == '.')
317 /* An unidentified character was found in the input. */
319 if (yyans.error == NULL) yyans.error = yyinbuf;
323 /* struct wordtable *find_word (word) char *word;
324 * Look up a word in the "wordtable" array via a binary search.
330 { register int low, mid, high;
331 register int comparison;
334 high = WORDTABLE_SIZE;
336 { mid = (low + high) / 2;
337 comparison = strcmp (wordtable[mid].text, word);
338 if (comparison == 0) return wordtable+mid;
339 if (comparison > 0) high = mid;