diff --git a/src/ol/expression/lexer.js b/src/ol/expression/lexer.js index db47971464..e19111dfcc 100644 --- a/src/ol/expression/lexer.js +++ b/src/ol/expression/lexer.js @@ -9,6 +9,7 @@ goog.require('goog.asserts'); */ ol.expression.Char = { AMPERSAND: 38, + BACKSLASH: 92, BANG: 33, // ! CARRIAGE_RETURN: 13, COMMA: 44, @@ -28,6 +29,7 @@ ol.expression.Char = { LOWER_A: 97, LOWER_E: 101, LOWER_F: 102, + LOWER_U: 117, LOWER_X: 120, LOWER_Z: 122, MINUS: 45, @@ -249,6 +251,47 @@ ol.expression.Lexer.prototype.isIdentifierStart_ = function(code) { }; +/** + * Determine if the given identifier is an ECMAScript keyword. These cannot + * be used as identifiers in programs. There is no real reason these could not + * be used in ol expressions - so it might be worth allowing them. + * + * http://www.ecma-international.org/ecma-262/5.1/#sec-7.6.1.1 + * @param {string} id Identifier. + * @return {boolean} The identifier is a keyword. + * @private + */ +ol.expression.Lexer.prototype.isKeyword_ = function(id) { + return ( + id === 'break' || + id === 'case' || + id === 'catch' || + id === 'continue' || + id === 'debugger' || + id === 'default' || + id === 'delete' || + id === 'do' || + id === 'else' || + id === 'finally' || + id === 'for' || + id === 'function' || + id === 'if' || + id === 'in' || + id === 'instanceof' || + id === 'new' || + id === 'return' || + id === 'switch' || + id === 'this' || + id === 'throw' || + id === 'try' || + id === 'typeof' || + id === 'var' || + id === 'void' || + id === 'while' || + id === 'with'); +}; + + /** * http://www.ecma-international.org/ecma-262/5.1/#sec-7.3 * @@ -331,6 +374,129 @@ ol.expression.Lexer.prototype.getCurrentCharCode_ = function() { }; +/** + * Get an identifier that includes escape sequences. + * + * @return {string} The identifier. + * @private + */ +ol.expression.Lexer.prototype.getEscapedIdentifier_ = function() { + var code = this.getCurrentCharCode_(); + var id = String.fromCharCode(code); + + this.increment_(1); + + // the \u sequence denotes an escaped character + if (code === ol.expression.Char.BACKSLASH) { + if (this.getCurrentCharCode_() !== ol.expression.Char.LOWER_U) { + throw new Error('Unexpected token at index ' + this.index_ + + ': ' + this.getCurrentChar_()); + } + this.increment_(1); + code = this.scanEscapeSequence_(ol.expression.Char.LOWER_U); + + if (!code || code === ol.expression.Char.BACKSLASH || + !this.isIdentifierStart_(code)) { + throw new Error('Unexpected token at index ' + this.index_ + + ': ' + this.getCurrentChar_()); + } + id = String.fromCharCode(code); + } + + while (this.index_ < this.length_) { + code = this.getCurrentCharCode_(); + if (!this.isIdentifierPart_(code)) { + break; + } + this.increment_(1); + id += String.fromCharCode(code); + + // the \u sequence denotes an escaped character + if (code === ol.expression.Char.BACKSLASH) { + if (this.getCurrentCharCode_() !== ol.expression.Char.LOWER_U) { + throw new Error('Unexpected token at index ' + this.index_ + + ': ' + this.getCurrentChar_()); + } + id = id.substr(0, id.length - 1); + this.increment_(1); + code = this.scanEscapeSequence_(ol.expression.Char.LOWER_U); + + if (!code || code === ol.expression.Char.BACKSLASH || + !this.isIdentifierStart_(code)) { + throw new Error('Unexpected token at index ' + this.index_ + + ': ' + this.getCurrentChar_()); + } + id += String.fromCharCode(code); + } + } + + return id; +}; + + +/** + * Get an identifier. This assumes we've encountered an identifier that doesn't + * start with an escape sequence. If an escape sequence is encountered during + * the scan, we switch to the `getEscapedIdentifier_` method. + * + * @return {string} The identifier. + * @private + */ +ol.expression.Lexer.prototype.getIdentifier_ = function() { + goog.asserts.assert( + this.getCurrentCharCode_() !== ol.expression.Char.BACKSLASH, + 'Must not be called with first char a backslash'); + + var start = this.index_; + this.increment_(1); + + var code; + while (this.index_ < this.length_) { + code = this.getCurrentCharCode_(); + if (code === ol.expression.Char.BACKSLASH) { + // reset cursor and start over scanning escaped identifier + this.index_ = start; + return this.getEscapedIdentifier_(); + } + if (this.isIdentifierPart_(code)) { + this.increment_(1); + } else { + break; + } + } + return this.source_.slice(start, this.index_); +}; + + +/** + * Scan an escape sequence of characters prefixed by the given character + * code. This works for both unicode escape sequences (e.g. \u0123) and + * hex escape sequences (e.g. \x12). + * http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.4 + * + * @param {number} prefix The character code of the escape prefix. + * @return {number} The unicode of the string resulting from the escape + * sequence. For invalid escape sequences, 0 is returned. + * @private + */ +ol.expression.Lexer.prototype.scanEscapeSequence_ = function(prefix) { + var code = 0; + var len = (prefix === ol.expression.Char.LOWER_U) ? 4 : 2; + var ch; + for (var i = 0; i < len; ++i) { + if (this.index_ < this.length_ && + this.isHexDigit_(this.getCurrentCharCode_())) { + ch = this.getCurrentChar_(); + code = (code * 16) + parseInt(ch, 16); + this.increment_(1); + } else { + return 0; + } + } + return code; +}; + + /** * Scan hex literal as numeric token. * @@ -376,7 +542,30 @@ ol.expression.Lexer.prototype.scanHexLiteral_ = function() { * @private */ ol.expression.Lexer.prototype.scanIdentifier_ = function() { - throw new Error('Not yet implemented'); + var code = this.getCurrentCharCode_(); + goog.asserts.assert(this.isIdentifierStart_(code), + 'Must be called with a valid identifier'); + + var id = (code === ol.expression.Char.BACKSLASH) ? + this.getEscapedIdentifier_() : this.getIdentifier_(); + + var type; + if (id.length === 1) { + type = ol.expression.TokenType.IDENTIFIER; + } else if (this.isKeyword_(id)) { + type = ol.expression.TokenType.KEYWORD; + } else if (id === 'null') { + type = ol.expression.TokenType.NULL_LITERAL; + } else if (id === 'true' || id === 'false') { + type = ol.expression.TokenType.BOOLEAN_LITERAL; + } else { + type = ol.expression.TokenType.IDENTIFIER; + } + + return { + type: type, + value: id + }; }; diff --git a/test/spec/ol/expression/lexer.test.js b/test/spec/ol/expression/lexer.test.js index 89f37614d5..0bb094a281 100644 --- a/test/spec/ol/expression/lexer.test.js +++ b/test/spec/ol/expression/lexer.test.js @@ -9,10 +9,97 @@ describe('ol.expression.Lexer', function() { }); }); + describe('#scanIdentifier_()', function() { + + function scan(source) { + var lexer = new ol.expression.Lexer(source); + return lexer.scanIdentifier_(); + } + + it('works for short identifiers', function() { + var token = scan('a'); + expect(token.value).to.be('a'); + expect(token.type).to.be(ol.expression.TokenType.IDENTIFIER); + }); + + it('works for longer identifiers', function() { + var token = scan('foo'); + expect(token.value).to.be('foo'); + expect(token.type).to.be(ol.expression.TokenType.IDENTIFIER); + }); + + it('works for $ anywhere', function() { + var token = scan('$foo$bar$'); + expect(token.value).to.be('$foo$bar$'); + expect(token.type).to.be(ol.expression.TokenType.IDENTIFIER); + }); + + it('works for _ anywhere', function() { + var token = scan('_foo_bar_'); + expect(token.value).to.be('_foo_bar_'); + expect(token.type).to.be(ol.expression.TokenType.IDENTIFIER); + }); + + it('works for keywords', function() { + var token = scan('delete'); + expect(token.value).to.be('delete'); + expect(token.type).to.be(ol.expression.TokenType.KEYWORD); + }); + + it('works for null', function() { + var token = scan('null'); + expect(token.value).to.be('null'); + expect(token.type).to.be(ol.expression.TokenType.NULL_LITERAL); + }); + + it('works for boolean true', function() { + var token = scan('true'); + expect(token.value).to.be('true'); + expect(token.type).to.be(ol.expression.TokenType.BOOLEAN_LITERAL); + }); + + it('works for boolean false', function() { + var token = scan('false'); + expect(token.value).to.be('false'); + expect(token.type).to.be(ol.expression.TokenType.BOOLEAN_LITERAL); + }); + + it('works with unicode escape sequences', function() { + var token = scan('\u006f\u006c\u0033'); + expect(token.value).to.be('ol3'); + expect(token.type).to.be(ol.expression.TokenType.IDENTIFIER); + }); + + it('works with hex escape sequences', function() { + var token = scan('\x6f\x6c\x33'); + expect(token.value).to.be('ol3'); + expect(token.type).to.be(ol.expression.TokenType.IDENTIFIER); + }); + + it('throws for identifiers starting with a number', function() { + expect(function() { + scan('4foo'); + }).throwException(); + }); + + it('throws for identifiers starting with a punctuation char', function() { + expect(function() { + scan('!foo'); + }).throwException(); + }); + + it('only scans valid identifier part', function() { + var token = scan('foo>bar'); + expect(token.value).to.be('foo'); + expect(token.type).to.be(ol.expression.TokenType.IDENTIFIER); + }); + + }); + describe('#scanNumericLiteral_()', function() { - function scan(code) { - var lexer = new ol.expression.Lexer(code); + function scan(source) { + var lexer = new ol.expression.Lexer(source); return lexer.scanNumericLiteral_(); }