Context Navigation

ejlex.c @ 7fcc26ac

4.104.114.84.95

Last change on this file since 7fcc26ac was a6b4c0df, checked in by Joel Sherrill <joel.sherrill@…>, on 09/01/00 at 10:57:21

2000-08-30 Joel Sherrill <joel@…>

Merged version 2.1 of GoAhead? webserver. This update was submitted by Antti P Miettinen <antti.p.miettinen@…>.
NOTES, base64.c, ejIntrn.h, emfdb.c, emfdb.h, md5.h, md5c.c, um.c, um.h: New files.
wbase64.c: Removed.
Makefile.am, asp.c, balloc.c, default.c, ej.h, ejlex.c, ejparse.c, form.c, h.c, handler.c, mime.c, misc.c, ringq.c, rom.c, security.c, socket.c, sym.c, uemf.c, uemf.h, url.c, value.c, webcomp.c, webmain.c, webpage.c, webrom.c, webs.c, webs.h, websuemf.c, wsIntrn.h: Modified.

Property mode set to 100644

File size: 13.9 KB

Line
1	/*
2	* ejlex.c -- Ejscript(TM) Lexical Analyser
3	*
4	* Copyright (c) GoAhead Software Inc., 1995-2000. All Rights Reserved.
5	*
6	* See the file "license.txt" for usage and redistribution license requirements
7	*/
8
9	/****************************** Description *******************************/
10
11	/*
12	* Ejscript lexical analyser. This implementes a lexical analyser for a
13	* a subset of the JavaScript language.
14	*/
15
16	/******************************** Includes ********************************/
17
18	#include "ejIntrn.h"
19
20	#if UEMF
21	#include "uemf.h"
22	#else
23	#include "basic/basicInternal.h"
24	#endif
25
26	/******************************** Defines *********************************/
27	#define OCTAL 8
28	#define HEX 16
29	/**************************** Forward Declarations ************************/
30
31	static int getLexicalToken(ej_t* ep, int state);
32	static int tokenAddChar(ej_t *ep, int c);
33	static int inputGetc(ej_t* ep);
34	static void inputPutback(ej_t* ep, int c);
35	static int charConvert(ej_t* ep, int base, int maxDig);
36
37	/*********************************** Code *********************************/
38	/*
39	* Setup the lexical analyser
40	*/
41
42	int ejLexOpen(ej_t* ep)
43	{
44	return 0;
45	}
46
47	/******************************************************************************/
48	/*
49	* Close the lexicial analyser
50	*/
51
52	void ejLexClose(ej_t* ep)
53	{
54	}
55
56	/******************************************************************************/
57	/*
58	* Open a new input script
59	*/
60
61	int ejLexOpenScript(ej_t* ep, char_t *script)
62	{
63	ejinput_t *ip;
64
65	a_assert(ep);
66	a_assert(script);
67
68	if ((ep->input = balloc(B_L, sizeof(ejinput_t))) == NULL) {
69	return -1;
70	}
71	ip = ep->input;
72	memset(ip, 0, sizeof(*ip));
73
74	a_assert(ip);
75	a_assert(ip->putBackToken == NULL);
76	a_assert(ip->putBackTokenId == 0);
77
78	/*
79	* Create the parse token buffer and script buffer
80	*/
81	if (ringqOpen(&ip->tokbuf, EJ_INC, -1) < 0) {
82	return -1;
83	}
84	if (ringqOpen(&ip->script, EJ_SCRIPT_INC, -1) < 0) {
85	return -1;
86	}
87	/*
88	* Put the Ejscript into a ring queue for easy parsing
89	*/
90	ringqPutStr(&ip->script, script);
91
92	ip->lineNumber = 1;
93	ip->lineLength = 0;
94	ip->lineColumn = 0;
95	ip->line = NULL;
96
97	return 0;
98	}
99
100	/******************************************************************************/
101	/*
102	* Close the input script
103	*/
104
105	void ejLexCloseScript(ej_t* ep)
106	{
107	ejinput_t *ip;
108
109	a_assert(ep);
110
111	ip = ep->input;
112	a_assert(ip);
113
114	if (ip->putBackToken) {
115	bfree(B_L, ip->putBackToken);
116	ip->putBackToken = NULL;
117	}
118	ip->putBackTokenId = 0;
119
120	if (ip->line) {
121	bfree(B_L, ip->line);
122	ip->line = NULL;
123	}
124
125	ringqClose(&ip->tokbuf);
126	ringqClose(&ip->script);
127
128	bfree(B_L, ip);
129	}
130
131	/******************************************************************************/
132	/*
133	* Save the input state
134	*/
135
136	void ejLexSaveInputState(ej_t* ep, ejinput_t* state)
137	{
138	ejinput_t *ip;
139
140	a_assert(ep);
141
142	ip = ep->input;
143	a_assert(ip);
144
145	state = ip;
146	if (ip->putBackToken) {
147	state->putBackToken = bstrdup(B_L, ip->putBackToken);
148	}
149	}
150
151	/******************************************************************************/
152	/*
153	* Restore the input state
154	*/
155
156	void ejLexRestoreInputState(ej_t* ep, ejinput_t* state)
157	{
158	ejinput_t *ip;
159
160	a_assert(ep);
161
162	ip = ep->input;
163	a_assert(ip);
164
165	ip->tokbuf = state->tokbuf;
166	ip->script = state->script;
167	ip->putBackTokenId = state->putBackTokenId;
168	if (ip->putBackToken) {
169	bfree(B_L, ip->putBackToken);
170	}
171	if (state->putBackToken) {
172	ip->putBackToken = bstrdup(B_L, state->putBackToken);
173	}
174	}
175
176	/******************************************************************************/
177	/*
178	* Free a saved input state
179	*/
180
181	void ejLexFreeInputState(ej_t* ep, ejinput_t* state)
182	{
183	if (state->putBackToken) {
184	bfree(B_L, state->putBackToken);
185	state->putBackToken = NULL;
186	}
187	}
188
189	/******************************************************************************/
190	/*
191	* Get the next Ejscript token
192	*/
193
194	int ejLexGetToken(ej_t* ep, int state)
195	{
196	ep->tid = getLexicalToken(ep, state);
197	trace(9, T("ejGetToken: %d, \"%s\"\n"), ep->tid, ep->token);
198	return ep->tid;
199	}
200
201	/******************************************************************************/
202	/*
203	* Get the next Ejscript token
204	*/
205
206	static int getLexicalToken(ej_t* ep, int state)
207	{
208	ringq_t inq, tokq;
209	ejinput_t* ip;
210	int done, tid, c, quote, style;
211
212	a_assert(ep);
213	ip = ep->input;
214	a_assert(ip);
215
216	inq = &ip->script;
217	tokq = &ip->tokbuf;
218
219	ep->tid = -1;
220	tid = -1;
221	ep->token = T("");
222
223	ringqFlush(tokq);
224
225	if (ip->putBackTokenId > 0) {
226	ringqPutStr(tokq, ip->putBackToken);
227	tid = ip->putBackTokenId;
228	ip->putBackTokenId = 0;
229	ep->token = (char_t*) tokq->servp;
230	return tid;
231	}
232
233	if ((c = inputGetc(ep)) < 0) {
234	return TOK_EOF;
235	}
236
237	for (done = 0; !done; ) {
238	switch (c) {
239	case -1:
240	return TOK_EOF;
241
242	case ' ':
243	case '\t':
244	case '\r':
245	do {
246	if ((c = inputGetc(ep)) < 0)
247	break;
248	} while (c == ' ' \|\| c == '\t' \|\| c == '\r');
249	break;
250
251	case '\n':
252	return TOK_NEWLINE;
253
254	case '(':
255	tokenAddChar(ep, c);
256	return TOK_LPAREN;
257
258	case ')':
259	tokenAddChar(ep, c);
260	return TOK_RPAREN;
261
262	case '{':
263	tokenAddChar(ep, c);
264	return TOK_LBRACE;
265
266	case '}':
267	tokenAddChar(ep, c);
268	return TOK_RBRACE;
269
270	case '+':
271	if ((c = inputGetc(ep)) < 0) {
272	ejError(ep, T("Syntax Error"));
273	return TOK_ERR;
274	}
275	if (c != '+' ) {
276	inputPutback(ep, c);
277	tokenAddChar(ep, EXPR_PLUS);
278	return TOK_EXPR;
279	}
280	tokenAddChar(ep, EXPR_INC);
281	return TOK_INC_DEC;
282
283	case '-':
284	if ((c = inputGetc(ep)) < 0) {
285	ejError(ep, T("Syntax Error"));
286	return TOK_ERR;
287	}
288	if (c != '-' ) {
289	inputPutback(ep, c);
290	tokenAddChar(ep, EXPR_MINUS);
291	return TOK_EXPR;
292	}
293	tokenAddChar(ep, EXPR_DEC);
294	return TOK_INC_DEC;
295
296	case '*':
297	tokenAddChar(ep, EXPR_MUL);
298	return TOK_EXPR;
299
300	case '%':
301	tokenAddChar(ep, EXPR_MOD);
302	return TOK_EXPR;
303
304	case '/':
305	/*
306	* Handle the division operator and comments
307	*/
308	if ((c = inputGetc(ep)) < 0) {
309	ejError(ep, T("Syntax Error"));
310	return TOK_ERR;
311	}
312	if (c != '*' && c != '/') {
313	inputPutback(ep, c);
314	tokenAddChar(ep, EXPR_DIV);
315	return TOK_EXPR;
316	}
317	style = c;
318	/*
319	* Eat comments. Both C and C++ comment styles are supported.
320	*/
321	while (1) {
322	if ((c = inputGetc(ep)) < 0) {
323	ejError(ep, T("Syntax Error"));
324	return TOK_ERR;
325	}
326	if (c == '\n' && style == '/') {
327	break;
328	} else if (c == '*') {
329	c = inputGetc(ep);
330	if (style == '/') {
331	if (c == '\n') {
332	break;
333	}
334	} else {
335	if (c == '/') {
336	break;
337	}
338	}
339	}
340	}
341	/*
342	* Continue looking for a token, so get the next character
343	*/
344	if ((c = inputGetc(ep)) < 0) {
345	return TOK_EOF;
346	}
347	break;
348
349	case '<': /* < and <= */
350	if ((c = inputGetc(ep)) < 0) {
351	ejError(ep, T("Syntax Error"));
352	return TOK_ERR;
353	}
354	if (c == '<') {
355	tokenAddChar(ep, EXPR_LSHIFT);
356	return TOK_EXPR;
357	} else if (c == '=') {
358	tokenAddChar(ep, EXPR_LESSEQ);
359	return TOK_EXPR;
360	}
361	tokenAddChar(ep, EXPR_LESS);
362	inputPutback(ep, c);
363	return TOK_EXPR;
364
365	case '>': /* > and >= */
366	if ((c = inputGetc(ep)) < 0) {
367	ejError(ep, T("Syntax Error"));
368	return TOK_ERR;
369	}
370	if (c == '>') {
371	tokenAddChar(ep, EXPR_RSHIFT);
372	return TOK_EXPR;
373	} else if (c == '=') {
374	tokenAddChar(ep, EXPR_GREATEREQ);
375	return TOK_EXPR;
376	}
377	tokenAddChar(ep, EXPR_GREATER);
378	inputPutback(ep, c);
379	return TOK_EXPR;
380
381	case '=': /* "==" */
382	if ((c = inputGetc(ep)) < 0) {
383	ejError(ep, T("Syntax Error"));
384	return TOK_ERR;
385	}
386	if (c == '=') {
387	tokenAddChar(ep, EXPR_EQ);
388	return TOK_EXPR;
389	}
390	inputPutback(ep, c);
391	return TOK_ASSIGNMENT;
392
393	case '!': /* "!=" or "!"*/
394	if ((c = inputGetc(ep)) < 0) {
395	ejError(ep, T("Syntax Error"));
396	return TOK_ERR;
397	}
398	if (c == '=') {
399	tokenAddChar(ep, EXPR_NOTEQ);
400	return TOK_EXPR;
401	}
402	inputPutback(ep, c);
403	tokenAddChar(ep, EXPR_BOOL_COMP);
404	return TOK_EXPR;
405
406	case ';':
407	tokenAddChar(ep, c);
408	return TOK_SEMI;
409
410	case ',':
411	tokenAddChar(ep, c);
412	return TOK_COMMA;
413
414	case '\|': /* "\|\|" */
415	if ((c = inputGetc(ep)) < 0 \|\| c != '\|') {
416	ejError(ep, T("Syntax Error"));
417	return TOK_ERR;
418	}
419	tokenAddChar(ep, COND_OR);
420	return TOK_LOGICAL;
421
422	case '&': /* "&&" */
423	if ((c = inputGetc(ep)) < 0 \|\| c != '&') {
424	ejError(ep, T("Syntax Error"));
425	return TOK_ERR;
426	}
427	tokenAddChar(ep, COND_AND);
428	return TOK_LOGICAL;
429
430	case '\"': /* String quote */
431	case '\'':
432	quote = c;
433	if ((c = inputGetc(ep)) < 0) {
434	ejError(ep, T("Syntax Error"));
435	return TOK_ERR;
436	}
437
438	while (c != quote) {
439	/*
440	* check for escape sequence characters
441	*/
442	if (c == '\\') {
443	c = inputGetc(ep);
444
445	if (gisdigit(c)) {
446	/*
447	* octal support, \101 maps to 65 = 'A'. put first char
448	* back so converter will work properly.
449	*/
450	inputPutback(ep, c);
451	c = charConvert(ep, OCTAL, 3);
452
453	} else {
454	switch (c) {
455	case 'n':
456	c = '\n'; break;
457	case 'b':
458	c = '\b'; break;
459	case 'f':
460	c = '\f'; break;
461	case 'r':
462	c = '\r'; break;
463	case 't':
464	c = '\t'; break;
465	case 'x':
466	/*
467	* hex support, \x41 maps to 65 = 'A'
468	*/
469	c = charConvert(ep, HEX, 2);
470	break;
471	case 'u':
472	/*
473	* unicode support, \x0401 maps to 65 = 'A'
474	*/
475	c = charConvert(ep, HEX, 2);
476	c = c*16 + charConvert(ep, HEX, 2);
477
478	break;
479	case '\'':
480	case '\"':
481	case '\\':
482	break;
483	default:
484	ejError(ep, T("Invalid Escape Sequence"));
485	return TOK_ERR;
486	}
487	}
488	if (tokenAddChar(ep, c) < 0) {
489	return TOK_ERR;
490	}
491	} else {
492	if (tokenAddChar(ep, c) < 0) {
493	return TOK_ERR;
494	}
495	}
496	if ((c = inputGetc(ep)) < 0) {
497	ejError(ep, T("Unmatched Quote"));
498	return TOK_ERR;
499	}
500	}
501	return TOK_LITERAL;
502
503	case '0': case '1': case '2': case '3': case '4':
504	case '5': case '6': case '7': case '8': case '9':
505	do {
506	if (tokenAddChar(ep, c) < 0) {
507	return TOK_ERR;
508	}
509	if ((c = inputGetc(ep)) < 0)
510	break;
511	} while (gisdigit(c));
512	inputPutback(ep, c);
513	return TOK_LITERAL;
514
515	default:
516	/*
517	* Identifiers or a function names
518	*/
519	while (1) {
520	if (c == '\\') {
521	/*
522	* just ignore any \ characters.
523	*/
524	} else if (tokenAddChar(ep, c) < 0) {
525	break;
526	}
527	if ((c = inputGetc(ep)) < 0) {
528	break;
529	}
530	if (!gisalnum(c) && c != '$' && c != '_' &&
531	c != '\\') {
532	break;
533	}
534	}
535	if (! gisalpha(tokq->servp) && tokq->servp != '$' &&
536	*tokq->servp != '_') {
537	ejError(ep, T("Invalid identifier %s"), tokq->servp);
538	return TOK_ERR;
539	}
540	/*
541	* Check for reserved words (only "if", "else", "var", "for"
542	* and "return" at the moment)
543	*/
544	if (state == STATE_STMT) {
545	if (gstrcmp(ep->token, T("if")) == 0) {
546	return TOK_IF;
547	} else if (gstrcmp(ep->token, T("else")) == 0) {
548	return TOK_ELSE;
549	} else if (gstrcmp(ep->token, T("var")) == 0) {
550	return TOK_VAR;
551	} else if (gstrcmp(ep->token, T("for")) == 0) {
552	return TOK_FOR;
553	} else if (gstrcmp(ep->token, T("return")) == 0) {
554	if ((c == ';') \|\| (c == '(')) {
555	inputPutback(ep, c);
556	}
557	return TOK_RETURN;
558	}
559	}
560
561	/*
562	* Skip white space after token to find out whether this is
563	* a function or not.
564	*/
565	while (c == ' ' \|\| c == '\t' \|\| c == '\r' \|\| c == '\n') {
566	if ((c = inputGetc(ep)) < 0)
567	break;
568	}
569
570	tid = (c == '(') ? TOK_FUNCTION : TOK_ID;
571	done++;
572	}
573	}
574
575	/*
576	* Putback the last extra character for next time
577	*/
578	inputPutback(ep, c);
579	return tid;
580	}
581
582	/******************************************************************************/
583	/*
584	* Putback the last token read
585	*/
586
587	void ejLexPutbackToken(ej_t* ep, int tid, char_t *string)
588	{
589	ejinput_t* ip;
590
591	a_assert(ep);
592	ip = ep->input;
593	a_assert(ip);
594
595	if (ip->putBackToken) {
596	bfree(B_L, ip->putBackToken);
597	}
598	ip->putBackTokenId = tid;
599	ip->putBackToken = bstrdup(B_L, string);
600	}
601
602	/******************************************************************************/
603	/*
604	* Add a character to the token ringq buffer
605	*/
606
607	static int tokenAddChar(ej_t *ep, int c)
608	{
609	ejinput_t* ip;
610
611	a_assert(ep);
612	ip = ep->input;
613	a_assert(ip);
614
615	if (ringqPutc(&ip->tokbuf, (char_t) c) < 0) {
616	ejError(ep, T("Token too big"));
617	return -1;
618	}
619	* ((char_t*) ip->tokbuf.endp) = '\0';
620	ep->token = (char_t*) ip->tokbuf.servp;
621
622	return 0;
623	}
624
625	/******************************************************************************/
626	/*
627	* Get another input character
628	*/
629
630	static int inputGetc(ej_t* ep)
631	{
632	ejinput_t *ip;
633	int c, len;
634
635	a_assert(ep);
636	ip = ep->input;
637
638	if ((len = ringqLen(&ip->script)) == 0) {
639	return -1;
640	}
641
642	c = ringqGetc(&ip->script);
643
644	if (c == '\n') {
645	ip->lineNumber++;
646	ip->lineColumn = 0;
647	} else {
648	if ((ip->lineColumn + 2) >= ip->lineLength) {
649	ip->lineLength += EJ_INC;
650	ip->line = brealloc(B_L, ip->line, ip->lineLength * sizeof(char_t));
651	}
652	ip->line[ip->lineColumn++] = c;
653	ip->line[ip->lineColumn] = '\0';
654	}
655	return c;
656	}
657
658	/******************************************************************************/
659	/*
660	* Putback a character onto the input queue
661	*/
662
663	static void inputPutback(ej_t* ep, int c)
664	{
665	ejinput_t *ip;
666
667	a_assert(ep);
668
669	ip = ep->input;
670	ringqInsertc(&ip->script, (char_t) c);
671	ip->lineColumn--;
672	ip->line[ip->lineColumn] = '\0';
673	}
674
675	/******************************************************************************/
676	/*
677	* Convert a hex or octal character back to binary, return original char if
678	* not a hex digit
679	*/
680
681	static int charConvert(ej_t* ep, int base, int maxDig)
682	{
683	int i, c, lval, convChar;
684
685	lval = 0;
686	for (i = 0; i < maxDig; i++) {
687	if ((c = inputGetc(ep)) < 0) {
688	break;
689	}
690	/*
691	* Initialize to out of range value
692	*/
693	convChar = base;
694	if (gisdigit(c)) {
695	convChar = c - '0';
696	} else if (c >= 'a' && c <= 'f') {
697	convChar = c - 'a' + 10;
698	} else if (c >= 'A' && c <= 'F') {
699	convChar = c - 'A' + 10;
700	}
701	/*
702	* if unexpected character then return it to buffer.
703	*/
704	if (convChar >= base) {
705	inputPutback(ep, c);
706	break;
707	}
708	lval = (lval * base) + convChar;
709	}
710	return lval;
711	}
712
713	/******************************************************************************/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: rtems/cpukit/httpd/ejlex.c @ 7fcc26ac

Download in other formats: