Context Navigation

source: rtems/cpukit/httpd/ejlex.c @ 0a7278e

4.104.115

Last change on this file since 0a7278e was 0a7278e, checked in by Ralf Corsepius <ralf.corsepius@…>, on 11/29/09 at 13:20:53
Whitespace removal.
Property mode set to `100644`
File size: 14.2 KB

Line
1	/*
2	* ejlex.c -- Ejscript(TM) Lexical Analyser
3	*
4	* Copyright (c) GoAhead Software Inc., 1995-2000. All Rights Reserved.
5	*
6	* See the file "license.txt" for usage and redistribution license requirements
7	*
8	* $Id$
9	*/
10
11	/****************************** Description *******************************/
12
13	/*
14	* Ejscript lexical analyser. This implementes a lexical analyser for a
15	* a subset of the JavaScript language.
16	*/
17
18	/******************************** Includes ********************************/
19
20	#include "ejIntrn.h"
21
22	#ifdef UEMF
23	#include "uemf.h"
24	#else
25	#include "basic/basicInternal.h"
26	#endif
27
28	/******************************** Defines *********************************/
29	#define OCTAL 8
30	#define HEX 16
31	/**************************** Forward Declarations ************************/
32
33	static int getLexicalToken(ej_t* ep, int state);
34	static int tokenAddChar(ej_t *ep, int c);
35	static int inputGetc(ej_t* ep);
36	static void inputPutback(ej_t* ep, int c);
37	static int charConvert(ej_t* ep, int base, int maxDig);
38
39	/*********************************** Code *********************************/
40	/*
41	* Setup the lexical analyser
42	*/
43
44	int ejLexOpen(ej_t* ep)
45	{
46	return 0;
47	}
48
49	/******************************************************************************/
50	/*
51	* Close the lexicial analyser
52	*/
53
54	void ejLexClose(ej_t* ep)
55	{
56	}
57
58	/******************************************************************************/
59	/*
60	* Open a new input script
61	*/
62
63	int ejLexOpenScript(ej_t* ep, char_t *script)
64	{
65	ejinput_t *ip;
66
67	a_assert(ep);
68	a_assert(script);
69
70	if ((ep->input = balloc(B_L, sizeof(ejinput_t))) == NULL) {
71	return -1;
72	}
73	ip = ep->input;
74	memset(ip, 0, sizeof(*ip));
75
76	a_assert(ip);
77	a_assert(ip->putBackToken == NULL);
78	a_assert(ip->putBackTokenId == 0);
79
80	/*
81	* Create the parse token buffer and script buffer
82	*/
83	if (ringqOpen(&ip->tokbuf, EJ_INC, -1) < 0) {
84	return -1;
85	}
86	if (ringqOpen(&ip->script, EJ_SCRIPT_INC, -1) < 0) {
87	return -1;
88	}
89	/*
90	* Put the Ejscript into a ring queue for easy parsing
91	*/
92	ringqPutStr(&ip->script, script);
93
94	ip->lineNumber = 1;
95	ip->lineLength = 0;
96	ip->lineColumn = 0;
97	ip->line = NULL;
98
99	return 0;
100	}
101
102	/******************************************************************************/
103	/*
104	* Close the input script
105	*/
106
107	void ejLexCloseScript(ej_t* ep)
108	{
109	ejinput_t *ip;
110
111	a_assert(ep);
112
113	ip = ep->input;
114	a_assert(ip);
115
116	if (ip->putBackToken) {
117	bfree(B_L, ip->putBackToken);
118	ip->putBackToken = NULL;
119	}
120	ip->putBackTokenId = 0;
121
122	if (ip->line) {
123	bfree(B_L, ip->line);
124	ip->line = NULL;
125	}
126
127	ringqClose(&ip->tokbuf);
128	ringqClose(&ip->script);
129
130	bfree(B_L, ip);
131	}
132
133	/******************************************************************************/
134	/*
135	* Save the input state
136	*/
137
138	void ejLexSaveInputState(ej_t* ep, ejinput_t* state)
139	{
140	ejinput_t *ip;
141
142	a_assert(ep);
143
144	ip = ep->input;
145	a_assert(ip);
146
147	state = ip;
148	if (ip->putBackToken) {
149	state->putBackToken = bstrdup(B_L, ip->putBackToken);
150	}
151	}
152
153	/******************************************************************************/
154	/*
155	* Restore the input state
156	*/
157
158	void ejLexRestoreInputState(ej_t* ep, ejinput_t* state)
159	{
160	ejinput_t *ip;
161
162	a_assert(ep);
163
164	ip = ep->input;
165	a_assert(ip);
166
167	ip->tokbuf = state->tokbuf;
168	ip->script = state->script;
169	ip->putBackTokenId = state->putBackTokenId;
170	if (ip->putBackToken) {
171	bfree(B_L, ip->putBackToken);
172	}
173	if (state->putBackToken) {
174	ip->putBackToken = bstrdup(B_L, state->putBackToken);
175	}
176	}
177
178	/******************************************************************************/
179	/*
180	* Free a saved input state
181	*/
182
183	void ejLexFreeInputState(ej_t* ep, ejinput_t* state)
184	{
185	if (state->putBackToken) {
186	bfree(B_L, state->putBackToken);
187	state->putBackToken = NULL;
188	}
189	}
190
191	/******************************************************************************/
192	/*
193	* Get the next Ejscript token
194	*/
195
196	int ejLexGetToken(ej_t* ep, int state)
197	{
198	ep->tid = getLexicalToken(ep, state);
199	/*
200	* commented out 04 Apr 02 Bg Porter -- we found a case where very long
201	* arguments to write() were being corrupted downstream in the trace call
202	* (the ep->token pointer was being overwritten with the trace message.
203	* restore this if it's useful for your debugging.
204	trace(9, T("ejGetToken: %d, \"%s\"\n"), ep->tid, ep->token);
205	*/
206	return ep->tid;
207	}
208
209	/******************************************************************************/
210	/*
211	* Get the next Ejscript token
212	*/
213
214	static int getLexicalToken(ej_t* ep, int state)
215	{
216	ringq_t inq, tokq;
217	ejinput_t* ip;
218	int done, tid, c, quote, style;
219
220	a_assert(ep);
221	ip = ep->input;
222	a_assert(ip);
223
224	inq = &ip->script;
225	tokq = &ip->tokbuf;
226
227	ep->tid = -1;
228	tid = -1;
229	ep->token = T("");
230
231	ringqFlush(tokq);
232
233	if (ip->putBackTokenId > 0) {
234	ringqPutStr(tokq, ip->putBackToken);
235	tid = ip->putBackTokenId;
236	ip->putBackTokenId = 0;
237	ep->token = (char_t*) tokq->servp;
238	return tid;
239	}
240
241	if ((c = inputGetc(ep)) < 0) {
242	return TOK_EOF;
243	}
244
245	for (done = 0; !done; ) {
246	switch (c) {
247	case -1:
248	return TOK_EOF;
249
250	case ' ':
251	case '\t':
252	case '\r':
253	do {
254	if ((c = inputGetc(ep)) < 0)
255	break;
256	} while (c == ' ' \|\| c == '\t' \|\| c == '\r');
257	break;
258
259	case '\n':
260	return TOK_NEWLINE;
261
262	case '(':
263	tokenAddChar(ep, c);
264	return TOK_LPAREN;
265
266	case ')':
267	tokenAddChar(ep, c);
268	return TOK_RPAREN;
269
270	case '{':
271	tokenAddChar(ep, c);
272	return TOK_LBRACE;
273
274	case '}':
275	tokenAddChar(ep, c);
276	return TOK_RBRACE;
277
278	case '+':
279	if ((c = inputGetc(ep)) < 0) {
280	ejError(ep, T("Syntax Error"));
281	return TOK_ERR;
282	}
283	if (c != '+' ) {
284	inputPutback(ep, c);
285	tokenAddChar(ep, EXPR_PLUS);
286	return TOK_EXPR;
287	}
288	tokenAddChar(ep, EXPR_INC);
289	return TOK_INC_DEC;
290
291	case '-':
292	if ((c = inputGetc(ep)) < 0) {
293	ejError(ep, T("Syntax Error"));
294	return TOK_ERR;
295	}
296	if (c != '-' ) {
297	inputPutback(ep, c);
298	tokenAddChar(ep, EXPR_MINUS);
299	return TOK_EXPR;
300	}
301	tokenAddChar(ep, EXPR_DEC);
302	return TOK_INC_DEC;
303
304	case '*':
305	tokenAddChar(ep, EXPR_MUL);
306	return TOK_EXPR;
307
308	case '%':
309	tokenAddChar(ep, EXPR_MOD);
310	return TOK_EXPR;
311
312	case '/':
313	/*
314	* Handle the division operator and comments
315	*/
316	if ((c = inputGetc(ep)) < 0) {
317	ejError(ep, T("Syntax Error"));
318	return TOK_ERR;
319	}
320	if (c != '*' && c != '/') {
321	inputPutback(ep, c);
322	tokenAddChar(ep, EXPR_DIV);
323	return TOK_EXPR;
324	}
325	style = c;
326	/*
327	* Eat comments. Both C and C++ comment styles are supported.
328	*/
329	while (1) {
330	if ((c = inputGetc(ep)) < 0) {
331	ejError(ep, T("Syntax Error"));
332	return TOK_ERR;
333	}
334	if (c == '\n' && style == '/') {
335	break;
336	} else if (c == '*') {
337	c = inputGetc(ep);
338	if (style == '/') {
339	if (c == '\n') {
340	break;
341	}
342	} else {
343	if (c == '/') {
344	break;
345	}
346	}
347	}
348	}
349	/*
350	* Continue looking for a token, so get the next character
351	*/
352	if ((c = inputGetc(ep)) < 0) {
353	return TOK_EOF;
354	}
355	break;
356
357	case '<': /* < and <= */
358	if ((c = inputGetc(ep)) < 0) {
359	ejError(ep, T("Syntax Error"));
360	return TOK_ERR;
361	}
362	if (c == '<') {
363	tokenAddChar(ep, EXPR_LSHIFT);
364	return TOK_EXPR;
365	} else if (c == '=') {
366	tokenAddChar(ep, EXPR_LESSEQ);
367	return TOK_EXPR;
368	}
369	tokenAddChar(ep, EXPR_LESS);
370	inputPutback(ep, c);
371	return TOK_EXPR;
372
373	case '>': /* > and >= */
374	if ((c = inputGetc(ep)) < 0) {
375	ejError(ep, T("Syntax Error"));
376	return TOK_ERR;
377	}
378	if (c == '>') {
379	tokenAddChar(ep, EXPR_RSHIFT);
380	return TOK_EXPR;
381	} else if (c == '=') {
382	tokenAddChar(ep, EXPR_GREATEREQ);
383	return TOK_EXPR;
384	}
385	tokenAddChar(ep, EXPR_GREATER);
386	inputPutback(ep, c);
387	return TOK_EXPR;
388
389	case '=': /* "==" */
390	if ((c = inputGetc(ep)) < 0) {
391	ejError(ep, T("Syntax Error"));
392	return TOK_ERR;
393	}
394	if (c == '=') {
395	tokenAddChar(ep, EXPR_EQ);
396	return TOK_EXPR;
397	}
398	inputPutback(ep, c);
399	return TOK_ASSIGNMENT;
400
401	case '!': /* "!=" or "!"*/
402	if ((c = inputGetc(ep)) < 0) {
403	ejError(ep, T("Syntax Error"));
404	return TOK_ERR;
405	}
406	if (c == '=') {
407	tokenAddChar(ep, EXPR_NOTEQ);
408	return TOK_EXPR;
409	}
410	inputPutback(ep, c);
411	tokenAddChar(ep, EXPR_BOOL_COMP);
412	return TOK_EXPR;
413
414	case ';':
415	tokenAddChar(ep, c);
416	return TOK_SEMI;
417
418	case ',':
419	tokenAddChar(ep, c);
420	return TOK_COMMA;
421
422	case '\|': /* "\|\|" */
423	if ((c = inputGetc(ep)) < 0 \|\| c != '\|') {
424	ejError(ep, T("Syntax Error"));
425	return TOK_ERR;
426	}
427	tokenAddChar(ep, COND_OR);
428	return TOK_LOGICAL;
429
430	case '&': /* "&&" */
431	if ((c = inputGetc(ep)) < 0 \|\| c != '&') {
432	ejError(ep, T("Syntax Error"));
433	return TOK_ERR;
434	}
435	tokenAddChar(ep, COND_AND);
436	return TOK_LOGICAL;
437
438	case '\"': /* String quote */
439	case '\'':
440	quote = c;
441	if ((c = inputGetc(ep)) < 0) {
442	ejError(ep, T("Syntax Error"));
443	return TOK_ERR;
444	}
445
446	while (c != quote) {
447	/*
448	* check for escape sequence characters
449	*/
450	if (c == '\\') {
451	c = inputGetc(ep);
452
453	if (gisdigit(c)) {
454	/*
455	* octal support, \101 maps to 65 = 'A'. put first char
456	* back so converter will work properly.
457	*/
458	inputPutback(ep, c);
459	c = charConvert(ep, OCTAL, 3);
460
461	} else {
462	switch (c) {
463	case 'n':
464	c = '\n'; break;
465	case 'b':
466	c = '\b'; break;
467	case 'f':
468	c = '\f'; break;
469	case 'r':
470	c = '\r'; break;
471	case 't':
472	c = '\t'; break;
473	case 'x':
474	/*
475	* hex support, \x41 maps to 65 = 'A'
476	*/
477	c = charConvert(ep, HEX, 2);
478	break;
479	case 'u':
480	/*
481	* unicode support, \x0401 maps to 65 = 'A'
482	*/
483	c = charConvert(ep, HEX, 2);
484	c = c*16 + charConvert(ep, HEX, 2);
485
486	break;
487	case '\'':
488	case '\"':
489	case '\\':
490	break;
491	default:
492	ejError(ep, T("Invalid Escape Sequence"));
493	return TOK_ERR;
494	}
495	}
496	if (tokenAddChar(ep, c) < 0) {
497	return TOK_ERR;
498	}
499	} else {
500	if (tokenAddChar(ep, c) < 0) {
501	return TOK_ERR;
502	}
503	}
504	if ((c = inputGetc(ep)) < 0) {
505	ejError(ep, T("Unmatched Quote"));
506	return TOK_ERR;
507	}
508	}
509	return TOK_LITERAL;
510
511	case '0': case '1': case '2': case '3': case '4':
512	case '5': case '6': case '7': case '8': case '9':
513	do {
514	if (tokenAddChar(ep, c) < 0) {
515	return TOK_ERR;
516	}
517	if ((c = inputGetc(ep)) < 0)
518	break;
519	} while (gisdigit(c));
520	inputPutback(ep, c);
521	return TOK_LITERAL;
522
523	default:
524	/*
525	* Identifiers or a function names
526	*/
527	while (1) {
528	if (c == '\\') {
529	/*
530	* just ignore any \ characters.
531	*/
532	} else if (tokenAddChar(ep, c) < 0) {
533	break;
534	}
535	if ((c = inputGetc(ep)) < 0) {
536	break;
537	}
538	if (!gisalnum(c) && c != '$' && c != '_' &&
539	c != '\\') {
540	break;
541	}
542	}
543	if (! gisalpha(tokq->servp) && tokq->servp != '$' &&
544	*tokq->servp != '_') {
545	ejError(ep, T("Invalid identifier %s"), tokq->servp);
546	return TOK_ERR;
547	}
548	/*
549	* Check for reserved words (only "if", "else", "var", "for"
550	* and "return" at the moment)
551	*/
552	if (state == STATE_STMT) {
553	if (gstrcmp(ep->token, T("if")) == 0) {
554	return TOK_IF;
555	} else if (gstrcmp(ep->token, T("else")) == 0) {
556	return TOK_ELSE;
557	} else if (gstrcmp(ep->token, T("var")) == 0) {
558	return TOK_VAR;
559	} else if (gstrcmp(ep->token, T("for")) == 0) {
560	return TOK_FOR;
561	} else if (gstrcmp(ep->token, T("return")) == 0) {
562	if ((c == ';') \|\| (c == '(')) {
563	inputPutback(ep, c);
564	}
565	return TOK_RETURN;
566	}
567	}
568
569	/*
570	* Skip white space after token to find out whether this is
571	* a function or not.
572	*/
573	while (c == ' ' \|\| c == '\t' \|\| c == '\r' \|\| c == '\n') {
574	if ((c = inputGetc(ep)) < 0)
575	break;
576	}
577
578	tid = (c == '(') ? TOK_FUNCTION : TOK_ID;
579	done++;
580	}
581	}
582
583	/*
584	* Putback the last extra character for next time
585	*/
586	inputPutback(ep, c);
587	return tid;
588	}
589
590	/******************************************************************************/
591	/*
592	* Putback the last token read
593	*/
594
595	void ejLexPutbackToken(ej_t* ep, int tid, char_t *string)
596	{
597	ejinput_t* ip;
598
599	a_assert(ep);
600	ip = ep->input;
601	a_assert(ip);
602
603	if (ip->putBackToken) {
604	bfree(B_L, ip->putBackToken);
605	}
606	ip->putBackTokenId = tid;
607	ip->putBackToken = bstrdup(B_L, string);
608	}
609
610	/******************************************************************************/
611	/*
612	* Add a character to the token ringq buffer
613	*/
614
615	static int tokenAddChar(ej_t *ep, int c)
616	{
617	ejinput_t* ip;
618
619	a_assert(ep);
620	ip = ep->input;
621	a_assert(ip);
622
623	if (ringqPutc(&ip->tokbuf, (char_t) c) < 0) {
624	ejError(ep, T("Token too big"));
625	return -1;
626	}
627	* ((char_t*) ip->tokbuf.endp) = '\0';
628	ep->token = (char_t*) ip->tokbuf.servp;
629
630	return 0;
631	}
632
633	/******************************************************************************/
634	/*
635	* Get another input character
636	*/
637
638	static int inputGetc(ej_t* ep)
639	{
640	ejinput_t *ip;
641	int c, len;
642
643	a_assert(ep);
644	ip = ep->input;
645
646	if ((len = ringqLen(&ip->script)) == 0) {
647	return -1;
648	}
649
650	c = ringqGetc(&ip->script);
651
652	if (c == '\n') {
653	ip->lineNumber++;
654	ip->lineColumn = 0;
655	} else {
656	if ((ip->lineColumn + 2) >= ip->lineLength) {
657	ip->lineLength += EJ_INC;
658	ip->line = brealloc(B_L, ip->line, ip->lineLength * sizeof(char_t));
659	}
660	ip->line[ip->lineColumn++] = c;
661	ip->line[ip->lineColumn] = '\0';
662	}
663	return c;
664	}
665
666	/******************************************************************************/
667	/*
668	* Putback a character onto the input queue
669	*/
670
671	static void inputPutback(ej_t* ep, int c)
672	{
673	ejinput_t *ip;
674
675	a_assert(ep);
676
677	ip = ep->input;
678	ringqInsertc(&ip->script, (char_t) c);
679	ip->lineColumn--;
680	ip->line[ip->lineColumn] = '\0';
681	}
682
683	/******************************************************************************/
684	/*
685	* Convert a hex or octal character back to binary, return original char if
686	* not a hex digit
687	*/
688
689	static int charConvert(ej_t* ep, int base, int maxDig)
690	{
691	int i, c, lval, convChar;
692
693	lval = 0;
694	for (i = 0; i < maxDig; i++) {
695	if ((c = inputGetc(ep)) < 0) {
696	break;
697	}
698	/*
699	* Initialize to out of range value
700	*/
701	convChar = base;
702	if (gisdigit(c)) {
703	convChar = c - '0';
704	} else if (c >= 'a' && c <= 'f') {
705	convChar = c - 'a' + 10;
706	} else if (c >= 'A' && c <= 'F') {
707	convChar = c - 'A' + 10;
708	}
709	/*
710	* if unexpected character then return it to buffer.
711	*/
712	if (convChar >= base) {
713	inputPutback(ep, c);
714	break;
715	}
716	lval = (lval * base) + convChar;
717	}
718	return lval;
719	}
720
721	/******************************************************************************/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: