diff --git a/src/ol/expression/expression.js b/src/ol/expression/expression.js index 1d8a7e4118..b7606fbe47 100644 --- a/src/ol/expression/expression.js +++ b/src/ol/expression/expression.js @@ -10,33 +10,6 @@ goog.provide('ol.expression'); */ -/** - * @enum {number} - */ -ol.expression.Char = { - CARRIAGE_RETURN: 13, - DIGIT_0: 48, - DIGIT_7: 55, - DIGIT_9: 57, - DOLLAR: 36, - FORM_FEED: 0xC, - LINE_FEED: 10, - LINE_SEPARATOR: 0x2028, - LOWER_A: 97, - LOWER_F: 102, - LOWER_Z: 122, - NONBREAKING_SPACE: 0xA0, - PARAGRAPH_SEPARATOR: 0x2029, - SPACE: 32, - TAB: 9, - UNDERSCORE: 95, - UPPER_A: 65, - UPPER_F: 70, - UPPER_Z: 90, - VERTICAL_TAB: 0xB -}; - - /** * @enum {string} */ @@ -50,126 +23,3 @@ ol.expression.Syntax = { PROPERTY: 'Property', // dot notation only UNARY_EXPRESSION: 'UnaryExpression' // only with logical not }; - - -/** - * @enum {string} - */ -ol.expression.TokenType = { - BOOLEAN_LITERAL: 'Boolean', - EOF: '', - IDENTIFIER: 'Identifier', - KEYWORD: 'Keyword', - NULL_LITERAL: 'Null', - NUMERIC_LITERAL: 'Numeric', - PUNCTUATOR: 'Punctuator', - STRING_LITERAL: 'String' -}; - - -/** - * http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.3 - * @param {number} ch The unicode of a character. - * @return {boolean} The character is a decimal digit. - */ -ol.expression.isDecimalDigit = function(ch) { - return (ch >= ol.expression.Char.DIGIT_0 && ch <= ol.expression.Char.DIGIT_9); -}; - - -/** - * http://www.ecma-international.org/ecma-262/5.1/#sec-7.6.1.2 - * @param {string} id A string identifier. - * @return {boolean} The identifier is a future reserved word. - */ -ol.expression.isFutureReservedWord = function(id) { - switch (id) { - case 'class': - case 'enum': - case 'export': - case 'extends': - case 'import': - case 'super': - return true; - default: - return false; - } -}; - - -/** - * http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.3 - * @param {number} ch The unicode of a character. - * @return {boolean} The character is a hex digit. - */ -ol.expression.isHexDigit = function(ch) { - return ol.expression.isDecimalDigit(ch) || - (ch >= ol.expression.Char.LOWER_A && ch <= ol.expression.Char.LOWER_F) || - (ch >= ol.expression.Char.UPPER_A && ch <= ol.expression.Char.UPPER_F); -}; - - -/** - * http://www.ecma-international.org/ecma-262/5.1/#sec-7.6 - * Doesn't deal with non-ascii identifiers. - * @param {number} ch The unicode of a character. - * @return {boolean} The character is a valid identifier part. - */ -ol.expression.isIdentifierPart = function(ch) { - return ol.expression.isIdentifierStart(ch) || - (ch >= ol.expression.Char.DIGIT_0 && ch <= ol.expression.Char.DIGIT_9); -}; - - -/** - * http://www.ecma-international.org/ecma-262/5.1/#sec-7.6 - * Doesn't yet deal with non-ascii identifiers. - * @param {number} ch The unicode of a character. - * @return {boolean} The character is a valid identifier start. - */ -ol.expression.isIdentifierStart = function(ch) { - return (ch === ol.expression.Char.DOLLAR) || - (ch === ol.expression.Char.UNDERSCORE) || - (ch >= ol.expression.Char.UPPER_A && ch <= ol.expression.Char.UPPER_Z) || - (ch >= ol.expression.Char.LOWER_A && ch <= ol.expression.Char.LOWER_Z); -}; - - -/** - * http://www.ecma-international.org/ecma-262/5.1/#sec-7.3 - * @param {number} ch The unicode of a character. - * @return {boolean} The character is a line terminator. - */ -ol.expression.isLineTerminator = function(ch) { - return (ch === ol.expression.Char.LINE_FEED) || - (ch === ol.expression.Char.CARRIAGE_RETURN) || - (ch === ol.expression.Char.LINE_SEPARATOR) || - (ch === ol.expression.Char.PARAGRAPH_SEPARATOR); -}; - - -/** - * http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.3 - * @param {number} ch The unicode of a character. - * @return {boolean} The character is an octal digit. - */ -ol.expression.isOctalDigit = function(ch) { - return (ch >= ol.expression.Char.DIGIT_0 && ch <= ol.expression.Char.DIGIT_7); -}; - - -/** - * http://www.ecma-international.org/ecma-262/5.1/#sec-7.2 - * @param {number} ch The unicode of a character. - * @return {boolean} The character is whitespace. - */ -ol.expression.isWhitespace = function(ch) { - return (ch === ol.expression.Char.SPACE) || - (ch === ol.expression.Char.TAB) || - (ch === ol.expression.Char.VERTICAL_TAB) || - (ch === ol.expression.Char.FORM_FEED) || - (ch === ol.expression.Char.NONBREAKING_SPACE) || - (ch >= 0x1680 && '\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005' + - '\u2006\u2007\u2008\u2009\u200A\u202F\u205F\u3000\uFEFF' - .indexOf(String.fromCharCode(ch)) > 0); -}; diff --git a/src/ol/expression/lexer.js b/src/ol/expression/lexer.js new file mode 100644 index 0000000000..564d580268 --- /dev/null +++ b/src/ol/expression/lexer.js @@ -0,0 +1,426 @@ +goog.provide('ol.expression.Lexer'); + + +/** + * @enum {number} + */ +ol.expression.Char = { + AMPERSAND: 38, + BANG: 33, // ! + CARRIAGE_RETURN: 13, + COMMA: 44, + DIGIT_0: 48, + DIGIT_7: 55, + DIGIT_9: 57, + DOLLAR: 36, + DOUBLE_QUOTE: 34, + DOT: 46, + EQUAL: 61, + FORM_FEED: 0xC, + GREATER: 62, + LEFT_PAREN: 40, + LESS: 60, + LINE_FEED: 10, + LINE_SEPARATOR: 0x2028, + LOWER_A: 97, + LOWER_F: 102, + LOWER_Z: 122, + MINUS: 45, + NONBREAKING_SPACE: 0xA0, + PARAGRAPH_SEPARATOR: 0x2029, + PERCENT: 37, + PIPE: 124, + PLUS: 43, + RIGHT_PAREN: 41, + SINGLE_QUOTE: 39, + SPACE: 32, + STAR: 42, + TAB: 9, + TILDE: 126, + UNDERSCORE: 95, + UPPER_A: 65, + UPPER_F: 70, + UPPER_Z: 90, + VERTICAL_TAB: 0xB +}; + + +/** + * @enum {string} + */ +ol.expression.TokenType = { + BOOLEAN_LITERAL: 'Boolean', + EOF: '', + IDENTIFIER: 'Identifier', + KEYWORD: 'Keyword', + NULL_LITERAL: 'Null', + NUMERIC_LITERAL: 'Numeric', + PUNCTUATOR: 'Punctuator', + STRING_LITERAL: 'String' +}; + + +/** + * @typedef {{type: (ol.expression.TokenType), + * value: (string|number|boolean|null)}} + */ +ol.expression.Token; + + + +/** + * Lexer constructor. + * @constructor + * @param {string} source Source code. + */ +ol.expression.Lexer = function(source) { + + /** + * Source code. + * @type {string} + * @private + */ + this.source_ = source; + + /** + * Source length. + * @type {number} + * @private + */ + this.length_ = source.length; + + /** + * Current character index. + * @type {number} + * @private + */ + this.index_ = 0; + +}; + + +/** + * Scan next token. + * @return {ol.expression.Token} Next token. + * @private + */ +ol.expression.Lexer.prototype.advance_ = function() { + if (this.index_ >= this.length_) { + return { + type: ol.expression.TokenType.EOF, + value: null + }; + } + var ch = this.getCurrentCharCode_(); + + // check for common punctuation + if (ch === ol.expression.Char.LEFT_PAREN || + ch === ol.expression.Char.RIGHT_PAREN) { + return this.scanPunctuator_(); + } + + // check for string literal + if (ch === ol.expression.Char.SINGLE_QUOTE || + ch === ol.expression.Char.DOUBLE_QUOTE) { + return this.scanStringLiteral_(); + } + + // check for identifier + if (this.isIdentifierStart_(ch)) { + this.scanIdentifier_(); + } + + // check dot punctuation or decimal + if (ch === ol.expression.Char.DOT) { + if (this.isDecimalDigit_(this.getCharCode_(1))) { + return this.scanNumericLiteral_(); + } + return this.scanPunctuator_(); + } + + // check decimal number + if (this.isDecimalDigit_(ch)) { + return this.scanNumericLiteral_(); + } + + // all the rest is punctuation + return this.scanPunctuator_(); +}; + + +/** + * Increment the current character index. + * @param {number} delta Delta by which the index is advanced. + * @private + */ +ol.expression.Lexer.prototype.increment_ = function(delta) { + this.index_ += delta; +}; + + +/** + * http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.3 + * @param {number} ch The unicode of a character. + * @return {boolean} The character is a decimal digit. + * @private + */ +ol.expression.Lexer.prototype.isDecimalDigit_ = function(ch) { + return (ch >= ol.expression.Char.DIGIT_0 && ch <= ol.expression.Char.DIGIT_9); +}; + + +/** + * http://www.ecma-international.org/ecma-262/5.1/#sec-7.6.1.2 + * @param {string} id A string identifier. + * @return {boolean} The identifier is a future reserved word. + * @private + */ +ol.expression.Lexer.prototype.isFutureReservedWord_ = function(id) { + return ( + id === 'class' || + id === 'enum' || + id === 'export' || + id === 'extends' || + id === 'import' || + id === 'super'); +}; + + +/** + * http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.3 + * @param {number} ch The unicode of a character. + * @return {boolean} The character is a hex digit. + * @private + */ +ol.expression.Lexer.prototype.isHexDigit_ = function(ch) { + return this.isDecimalDigit_(ch) || + (ch >= ol.expression.Char.LOWER_A && ch <= ol.expression.Char.LOWER_F) || + (ch >= ol.expression.Char.UPPER_A && ch <= ol.expression.Char.UPPER_F); +}; + + +/** + * http://www.ecma-international.org/ecma-262/5.1/#sec-7.6 + * Doesn't deal with non-ascii identifiers. + * @param {number} ch The unicode of a character. + * @return {boolean} The character is a valid identifier part. + * @private + */ +ol.expression.Lexer.prototype.isIdentifierPart_ = function(ch) { + return this.isIdentifierStart_(ch) || + (ch >= ol.expression.Char.DIGIT_0 && ch <= ol.expression.Char.DIGIT_9); +}; + + +/** + * http://www.ecma-international.org/ecma-262/5.1/#sec-7.6 + * Doesn't yet deal with non-ascii identifiers. + * @param {number} ch The unicode of a character. + * @return {boolean} The character is a valid identifier start. + * @private + */ +ol.expression.Lexer.prototype.isIdentifierStart_ = function(ch) { + return (ch === ol.expression.Char.DOLLAR) || + (ch === ol.expression.Char.UNDERSCORE) || + (ch >= ol.expression.Char.UPPER_A && ch <= ol.expression.Char.UPPER_Z) || + (ch >= ol.expression.Char.LOWER_A && ch <= ol.expression.Char.LOWER_Z); +}; + + +/** + * http://www.ecma-international.org/ecma-262/5.1/#sec-7.3 + * @param {number} ch The unicode of a character. + * @return {boolean} The character is a line terminator. + * @private + */ +ol.expression.Lexer.prototype.isLineTerminator_ = function(ch) { + return (ch === ol.expression.Char.LINE_FEED) || + (ch === ol.expression.Char.CARRIAGE_RETURN) || + (ch === ol.expression.Char.LINE_SEPARATOR) || + (ch === ol.expression.Char.PARAGRAPH_SEPARATOR); +}; + + +/** + * http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.3 + * @param {number} ch The unicode of a character. + * @return {boolean} The character is an octal digit. + * @private + */ +ol.expression.Lexer.prototype.isOctalDigit_ = function(ch) { + return (ch >= ol.expression.Char.DIGIT_0 && ch <= ol.expression.Char.DIGIT_7); +}; + + +/** + * http://www.ecma-international.org/ecma-262/5.1/#sec-7.2 + * @param {number} ch The unicode of a character. + * @return {boolean} The character is whitespace. + * @private + */ +ol.expression.Lexer.prototype.isWhitespace_ = function(ch) { + return (ch === ol.expression.Char.SPACE) || + (ch === ol.expression.Char.TAB) || + (ch === ol.expression.Char.VERTICAL_TAB) || + (ch === ol.expression.Char.FORM_FEED) || + (ch === ol.expression.Char.NONBREAKING_SPACE) || + (ch >= 0x1680 && '\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005' + + '\u2006\u2007\u2008\u2009\u200A\u202F\u205F\u3000\uFEFF' + .indexOf(String.fromCharCode(ch)) > 0); +}; + + +/** + * Get the unicode of the character at the given offset from the current index. + * @param {number} delta Offset from current index. + * @return {number} The character code. + * @private + */ +ol.expression.Lexer.prototype.getCharCode_ = function(delta) { + return this.source_.charCodeAt(this.index_ + delta); +}; + + +/** + * Get the unicode of the character at the current index. + * @return {number} The current character code. + * @private + */ +ol.expression.Lexer.prototype.getCurrentCharCode_ = function() { + return this.getCharCode_(0); +}; + + +/** + * Scan punctuator token (a subset of allowed tokens in 7.7). + * @return {ol.expression.Token} Punctuator token. + * @private + */ +ol.expression.Lexer.prototype.scanPunctuator_ = function() { + var ch = this.getCurrentCharCode_(); + + // single char punctuation + if (ch === ol.expression.Char.DOT || + ch === ol.expression.Char.LEFT_PAREN || + ch === ol.expression.Char.RIGHT_PAREN || + ch === ol.expression.Char.COMMA || + ch === ol.expression.Char.GREATER || + ch === ol.expression.Char.LESS || + ch === ol.expression.Char.PLUS || + ch === ol.expression.Char.MINUS || + ch === ol.expression.Char.STAR || + ch === ol.expression.Char.PERCENT || + ch === ol.expression.Char.PIPE || + ch === ol.expression.Char.AMPERSAND || + ch === ol.expression.Char.TILDE) { + + this.increment_(1); + return { + type: ol.expression.TokenType.PUNCTUATOR, + value: String.fromCharCode(ch) + }; + } + + // check for 2-character punctuation + var ch1 = this.getCharCode_(1); + + // assignment or comparison (and we don't allow assignment) + if (ch1 === ol.expression.Char.EQUAL) { + if (ch === ol.expression.Char.BANG || ch === ol.expression.Char.EQUAL) { + // we're looking at !=, ==, !==, or === + this.increment_(2); + + // check for triple + if (this.getCharCode_(1) === ol.expression.Char.EQUAL) { + this.increment_(1); + return { + type: ol.expression.TokenType.PUNCTUATOR, + value: String.fromCharCode(ch) + '==' + }; + } else { + // != or == + return { + type: ol.expression.TokenType.PUNCTUATOR, + value: String.fromCharCode(ch) + '=' + }; + } + } + + if (ch === ol.expression.Char.GREATER || ch === ol.expression.Char.LESS) { + return { + type: ol.expression.TokenType.PUNCTUATOR, + value: String.fromCharCode(ch) + '=' + }; + } + } + + // remaining 2-charcter punctuators are || and && + if (ch === ch1 && + (ch === ol.expression.Char.PIPE || ch === ol.expression.Char.AMPERSAND)) { + + this.increment_(2); + var str = String.fromCharCode(ch); + return { + type: ol.expression.TokenType.PUNCTUATOR, + value: str + str + }; + } + + // we don't allow 4-character punctuator (>>>=) + // and the allowed 3-character punctuators (!==, ===) are already consumed + + throw new Error('Unexpected token at index ' + this.index_ + + ': ' + String.fromCharCode(ch)); +}; + + +/** + * Scan identifier token. + * @return {ol.expression.Token} Identifier token. + * @private + */ +ol.expression.Lexer.prototype.scanIdentifier_ = function() { + throw new Error('Not yet implemented'); +}; + + +/** + * Scan numeric literal token. + * @return {ol.expression.Token} Numeric literal token. + * @private + */ +ol.expression.Lexer.prototype.scanNumericLiteral_ = function() { + throw new Error('Not yet implemented'); +}; + + +/** + * Scan string literal token. + * @return {ol.expression.Token} String literal token. + * @private + */ +ol.expression.Lexer.prototype.scanStringLiteral_ = function() { + throw new Error('Not yet implemented'); +}; + + +/** + * Peek at the next token, but don't advance the index. + * @return {ol.expression.Token} The upcoming token. + * @private + */ +ol.expression.Lexer.prototype.peek_ = function() { + var currentIndex = this.index_; + var token = this.advance_(); + this.index_ = currentIndex; + return token; +}; + + +/** + * Tokenize the provided code. + * @return {Array.} Tokens. + */ +ol.expression.Lexer.prototype.tokenize = function() { + return []; +}; diff --git a/test/spec/ol/expression/lexer.test.js b/test/spec/ol/expression/lexer.test.js new file mode 100644 index 0000000000..91040c928a --- /dev/null +++ b/test/spec/ol/expression/lexer.test.js @@ -0,0 +1,14 @@ +goog.provide('ol.test.expression.Lexer'); + +describe('ol.expression.Lexer', function() { + + describe('constructor', function() { + it('creates a new lexer', function() { + var lexer = new ol.expression.Lexer('foo'); + expect(lexer).to.be.a(ol.expression.Lexer); + }); + }); + +}); + +goog.require('ol.expression.Lexer');