35 #include "interpreter.h"
38 #include "identifier.h"
46 static Lexer *currLexer = 0;
52 #include "lexer.lut.h"
54 extern YYLTYPE yylloc;
59 return Lexer::curr()->lex();
64 size8(128), size16(128), restrKeyword(false),
65 convertNextIdentifier(false), stackToken(-1), lastToken(-1), pos(0),
70 current(0), next1(0), next2(0), next3(0),
71 strings(0), numStrings(0), stringsCapacity(0),
72 identifiers(0), numIdentifiers(0), identifiersCapacity(0)
75 buffer8 =
new char[size8];
76 buffer16 =
new UChar[size16];
90 currLexer =
new Lexer();
96 void Lexer::globalClear()
103 void Lexer::setCode(
const UChar *c,
unsigned int len)
106 restrKeyword =
false;
108 convertNextIdentifier =
false;
117 #ifndef KJS_PURE_ECMA
122 current = (length > 0) ? code[0].uc : -1;
123 next1 = (length > 1) ? code[1].uc : -1;
124 next2 = (length > 2) ? code[2].uc : -1;
125 next3 = (length > 3) ? code[3].uc : -1;
128 void Lexer::shift(
unsigned int p)
135 next3 = (pos + 3 < length) ? code[pos+3].uc : -1;
140 void Lexer::nextLine()
143 #ifndef KJS_PURE_ECMA
148 void Lexer::setDone(State s)
158 unsigned short stringType = 0;
167 if (stackToken >= 0) {
174 if (skipLF && current !=
'\n')
176 if (skipCR && current !=
'\r')
178 if (skipLF || skipCR)
185 bool cr = (current ==
'\r');
186 bool lf = (current ==
'\n');
191 bool isLineTerminator = cr || lf;
195 if (isWhiteSpace(current)) {
197 }
else if (current ==
'/' && next1 ==
'/') {
199 state = InSingleLineComment;
200 }
else if (current ==
'/' && next1 ==
'*') {
202 state = InMultiLineComment;
203 }
else if (current == -1) {
204 if (!terminator && !delimited) {
211 }
else if (isLineTerminator) {
218 }
else if (current ==
'"' || current ==
'\'') {
220 stringType = current;
221 }
else if (isIdentLetter(current)) {
223 state = InIdentifierOrKeyword;
224 }
else if (current ==
'\\') {
225 state = InIdentifierUnicodeEscapeStart;
226 }
else if (current ==
'0') {
229 }
else if (isDecimalDigit(current)) {
232 }
else if (current ==
'.' && isDecimalDigit(next1)) {
235 #ifndef KJS_PURE_ECMA
237 }
else if (current ==
'<' && next1 ==
'!' &&
238 next2 ==
'-' && next3 ==
'-') {
240 state = InSingleLineComment;
242 }
else if (bol && current ==
'-' && next1 ==
'-' && next2 ==
'>') {
244 state = InSingleLineComment;
247 token = matchPunctuator(current, next1, next2, next3);
257 if (current == stringType) {
260 }
else if (current == -1 || isLineTerminator) {
262 }
else if (current ==
'\\') {
263 state = InEscapeSequence;
269 case InEscapeSequence:
270 if (isOctalDigit(current)) {
271 if (current >=
'0' && current <=
'3' &&
272 isOctalDigit(next1) && isOctalDigit(next2)) {
273 record16(convertOctal(current, next1, next2));
276 }
else if (isOctalDigit(current) && isOctalDigit(next1)) {
277 record16(convertOctal(
'0', current, next1));
280 }
else if (isOctalDigit(current)) {
281 record16(convertOctal(
'0',
'0', current));
286 }
else if (current ==
'x')
288 else if (current ==
'u')
289 state = InUnicodeEscape;
291 if (isLineTerminator)
293 record16(singleEscape(current));
298 if (isHexDigit(current) && isHexDigit(next1)) {
300 record16(convertHex(current, next1));
302 }
else if (current == stringType) {
312 case InUnicodeEscape:
313 if (isHexDigit(current) && isHexDigit(next1) &&
314 isHexDigit(next2) && isHexDigit(next3)) {
315 record16(convertUnicode(current, next1, next2, next3));
318 }
else if (current == stringType) {
326 case InSingleLineComment:
327 if (isLineTerminator) {
335 }
else if (current == -1) {
339 case InMultiLineComment:
342 }
else if (isLineTerminator) {
344 }
else if (current ==
'*' && next1 ==
'/') {
349 case InIdentifierOrKeyword:
351 if (isIdentLetter(current) || isDecimalDigit(current))
353 else if (current ==
'\\')
354 state = InIdentifierUnicodeEscapeStart;
356 setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword :
Identifier);
359 if (current ==
'x' || current ==
'X') {
362 }
else if (current ==
'.') {
365 }
else if (current ==
'e' || current ==
'E') {
367 state = InExponentIndicator;
368 }
else if (isOctalDigit(current)) {
371 }
else if (isDecimalDigit(current)) {
379 if (isHexDigit(current)) {
386 if (isOctalDigit(current)) {
389 else if (isDecimalDigit(current)) {
396 if (isDecimalDigit(current)) {
398 }
else if (current ==
'.') {
401 }
else if (current ==
'e' || current ==
'E') {
403 state = InExponentIndicator;
408 if (isDecimalDigit(current)) {
410 }
else if (current ==
'e' || current ==
'E') {
412 state = InExponentIndicator;
416 case InExponentIndicator:
417 if (current ==
'+' || current ==
'-') {
419 }
else if (isDecimalDigit(current)) {
426 if (isDecimalDigit(current)) {
431 case InIdentifierUnicodeEscapeStart:
433 state = InIdentifierUnicodeEscape;
437 case InIdentifierUnicodeEscape:
438 if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
439 record16(convertUnicode(current, next1, next2, next3));
441 state = InIdentifier;
447 assert(!
"Unhandled state in switch statement");
453 #ifndef KJS_PURE_ECMA
454 if (state != Start && state != InSingleLineComment)
460 if ((state ==
Number || state == Octal || state == Hex)
461 && isIdentLetter(current))
465 buffer8[pos8] =
'\0';
468 fprintf(stderr,
"line: %d ", lineNo());
469 fprintf(stderr,
"yytext (%x): ", buffer8[0]);
470 fprintf(stderr,
"%s ", buffer8);
473 long double dval = 0;
475 dval = kjs_strtod(buffer8, 0L);
476 }
else if (state == Hex) {
478 if (buffer8[0] ==
'0' && (buffer8[1] ==
'x' || buffer8[1] ==
'X')) {
479 for (
const char *p = buffer8+2; *p; p++) {
480 if (!isHexDigit(*p)) {
484 dval = dval * 16 + convertHex(*p);
488 }
else if (state == Octal) {
490 if (buffer8[0] ==
'0') {
491 for (
const char *p = buffer8+1; *p; p++) {
492 if (*p < '0' || *p >
'7') {
496 dval = dval * 8 + *p -
'0';
511 case IdentifierOrKeyword:
512 printf(
"(Identifier)/(Keyword)\n");
515 printf(
"(String)\n");
518 printf(
"(Number)\n");
525 if (state !=
Identifier && state != IdentifierOrKeyword &&
526 convertNextIdentifier)
527 convertNextIdentifier =
false;
529 restrKeyword =
false;
531 kjsyylloc.first_line = yylineno;
532 kjsyylloc.last_line = yylineno;
539 if(token ==
'}' || token ==
';') {
543 case IdentifierOrKeyword:
544 if ((token = Lookup::find(&mainTable, buffer16, pos16)) < 0) {
548 if (convertNextIdentifier) {
549 convertNextIdentifier =
false;
551 UString debugstr(buffer16, pos16); fprintf(stderr,
"Anonymous function hack: eating identifier %s\n",debugstr.ascii());
553 token = FUNCEXPRIDENT;
558 kjsyylval.ident = makeIdentifier(buffer16, pos16);
562 convertNextIdentifier =
false;
566 if ( token == FUNCTION &&
567 ( lastToken ==
'=' || lastToken ==
',' || lastToken ==
'(' ||
568 lastToken ==
':' || lastToken == RETURN ) )
569 convertNextIdentifier =
true;
571 if (token == CONTINUE || token == BREAK ||
572 token == RETURN || token == THROW)
576 kjsyylval.ustr = makeUString(buffer16, pos16);
580 kjsyylval.dval = dval;
587 assert(!
"unhandled numeration value in switch");
594 bool Lexer::isWhiteSpace(
unsigned short c)
596 return (c ==
' ' || c ==
'\t' ||
597 c == 0x0b || c == 0x0c || c == 0xa0);
600 bool Lexer::isIdentLetter(
unsigned short c)
607 return (c >=
'a' && c <=
'z' ||
608 c >=
'A' && c <=
'Z' ||
610 c >= 0x00c0 && c <= 0x00d6 ||
612 c >= 0x00d8 && c <= 0x00f6 ||
614 c >= 0x00f8 && c <= 0x02af ||
616 c >= 0x0388 && c <= 0x1ffc ||
617 c ==
'$' || c ==
'_');
621 bool Lexer::isDecimalDigit(
unsigned short c)
623 return (c >=
'0' && c <=
'9');
626 bool Lexer::isHexDigit(
unsigned short c)
628 return (c >=
'0' && c <=
'9' ||
629 c >=
'a' && c <=
'f' ||
630 c >=
'A' && c <=
'F');
633 bool Lexer::isOctalDigit(
unsigned short c)
635 return (c >=
'0' && c <=
'7');
638 int Lexer::matchPunctuator(
unsigned short c1,
unsigned short c2,
639 unsigned short c3,
unsigned short c4)
641 if (c1 ==
'>' && c2 ==
'>' && c3 ==
'>' && c4 ==
'=') {
644 }
else if (c1 ==
'=' && c2 ==
'=' && c3 ==
'=') {
647 }
else if (c1 ==
'!' && c2 ==
'=' && c3 ==
'=') {
650 }
else if (c1 ==
'>' && c2 ==
'>' && c3 ==
'>') {
653 }
else if (c1 ==
'<' && c2 ==
'<' && c3 ==
'=') {
656 }
else if (c1 ==
'>' && c2 ==
'>' && c3 ==
'=') {
659 }
else if (c1 ==
'<' && c2 ==
'=') {
662 }
else if (c1 ==
'>' && c2 ==
'=') {
665 }
else if (c1 ==
'!' && c2 ==
'=') {
668 }
else if (c1 ==
'+' && c2 ==
'+') {
674 }
else if (c1 ==
'-' && c2 ==
'-') {
677 return AUTOMINUSMINUS;
680 }
else if (c1 ==
'=' && c2 ==
'=') {
683 }
else if (c1 ==
'+' && c2 ==
'=') {
686 }
else if (c1 ==
'-' && c2 ==
'=') {
689 }
else if (c1 ==
'*' && c2 ==
'=') {
692 }
else if (c1 ==
'/' && c2 ==
'=') {
695 }
else if (c1 ==
'&' && c2 ==
'=') {
698 }
else if (c1 ==
'^' && c2 ==
'=') {
701 }
else if (c1 ==
'%' && c2 ==
'=') {
704 }
else if (c1 ==
'|' && c2 ==
'=') {
707 }
else if (c1 ==
'<' && c2 ==
'<') {
710 }
else if (c1 ==
'>' && c2 ==
'>') {
713 }
else if (c1 ==
'&' && c2 ==
'&') {
716 }
else if (c1 ==
'|' && c2 ==
'|') {
747 return static_cast<int>(c1);
753 unsigned short Lexer::singleEscape(
unsigned short c)
const
779 unsigned short Lexer::convertOctal(
unsigned short c1,
unsigned short c2,
780 unsigned short c3)
const
782 return ((c1 -
'0') * 64 + (c2 -
'0') * 8 + c3 -
'0');
785 unsigned char Lexer::convertHex(
unsigned short c)
787 if (c >=
'0' && c <=
'9')
789 else if (c >=
'a' && c <=
'f')
790 return (c -
'a' + 10);
792 return (c -
'A' + 10);
795 unsigned char Lexer::convertHex(
unsigned short c1,
unsigned short c2)
797 return ((convertHex(c1) << 4) + convertHex(c2));
800 UChar Lexer::convertUnicode(
unsigned short c1,
unsigned short c2,
801 unsigned short c3,
unsigned short c4)
803 return UChar((convertHex(c1) << 4) + convertHex(c2),
804 (convertHex(c3) << 4) + convertHex(c4));
807 void Lexer::record8(
unsigned short c)
812 if (pos8 >= size8 - 1) {
813 char *tmp =
new char[2 * size8];
814 memcpy(tmp, buffer8, size8 *
sizeof(
char));
820 buffer8[pos8++] = (char) c;
823 void Lexer::record16(
int c)
827 record16(
UChar(
static_cast<unsigned short>(c)));
830 void Lexer::record16(
UChar c)
833 if (pos16 >= size16 - 1) {
835 memcpy(tmp, buffer16, size16 *
sizeof(
UChar));
841 buffer16[pos16++] = c;
844 bool Lexer::scanRegExp()
847 bool lastWasEscape =
false;
848 bool inBrackets =
false;
851 if (current ==
'\r' || current ==
'\n' || current == -1)
853 else if (current !=
'/' || lastWasEscape ==
true || inBrackets ==
true)
856 if ( !lastWasEscape ) {
857 if ( current ==
'[' && !inBrackets )
859 if ( current ==
']' && inBrackets )
864 !lastWasEscape && (current ==
'\\');
867 pattern =
UString(buffer16, pos16);
875 while (isIdentLetter(current)) {
879 flags =
UString(buffer16, pos16);
885 void Lexer::doneParsing()
887 for (
unsigned i = 0; i < numIdentifiers; i++) {
888 delete identifiers[i];
893 identifiersCapacity = 0;
895 for (
unsigned i = 0; i < numStrings; i++) {
904 const int initialCapacity = 64;
905 const int growthFactor = 2;
909 if (numIdentifiers == identifiersCapacity) {
910 identifiersCapacity = (identifiersCapacity == 0) ? initialCapacity : identifiersCapacity *growthFactor;
915 identifiers[numIdentifiers++] = identifier;
919 UString *Lexer::makeUString(
UChar *buffer,
unsigned int pos)
921 if (numStrings == stringsCapacity) {
922 stringsCapacity = (stringsCapacity == 0) ? initialCapacity : stringsCapacity *growthFactor;
923 strings = (
UString **)realloc(strings,
sizeof(
UString *) * stringsCapacity);
927 strings[numStrings++] = string;
Represents an Identifier for a Javascript object.
Represents an primitive Number value.
Represents an primitive String value.