Building an SQL tokenizer in Flex
This post is an introduction to the next post that I am going to publish within next few days. It describes one of the features of MySQL on AIR, SQL code tokenizer, used for code coloring.
For those who have no idea what a tokenizer is there is a great introduction on Wikipedia. In a few words, the tokenizer is used to classify parts of a input data by splitting it to smaller chunks. Those chunks may be later used in a variety of ways, for example implement code coloring. This is what I am going to show further.
I starting with bunch of not interesting classes which are going to be used by the tool later. It will be a list of MySQL functions and keywords. The tool is aware of which is which.
[source lang='as3']
package uk.co.riait.sql {
public class SQLFunctions {
// list of MySQL functions, the value has no meaning, it simply has to be there.
public static const functions:Object = {
ABS: 1,
ACOS: 1,
ADDDATE: 1,
ADDTIME: 1,
AES_DECRYPT: 1,
AES_ENCRYPT: 1,
ASCII: 1,
ANALYSE: 1,
AND: 1,
ASIN: 1,
ATAN2: 1,
ATAN: 1,
AVG: 1,
BENCHMARK: 1,
BETWEEN: 1,
BIN: 1,
BINARY: 1,
BIT_AND: 1,
BIT_COUNT: 1,
BIT_LENGTH: 1,
BIT_OR: 1,
BIT_XOR: 1,
CASE: 1,
CAST: 1,
CEIL: 1,
CEILING: 1,
CHAR_LENGTH: 1,
CHAR: 1,
CHARACTER_LENGTH: 1,
CHARSET: 1,
COALESCE: 1,
COERCIBILITY: 1,
COLLATION: 1,
COMPRESS: 1,
CONCAT_WS: 1,
CONCAT: 1,
CONNECTION_ID: 1,
CONV: 1,
CONVERT_TZ: 1,
CONVERT: 1,
COS: 1,
COT: 1,
COUNT: 1,
COUNT: 1,
CRC32: 1,
CURDATE: 1,
CURRENT_DATE: 1,
CURRENT_TIME: 1,
CURRENT_TIMESTAMP: 1,
CURRENT_USER: 1,
CURTIME: 1,
DATABASE: 1,
DATE_ADD: 1,
DATE_FORMAT: 1,
DATE_SUB: 1,
DATE: 1,
DATEDIFF: 1,
DAY: 1,
DAYNAME: 1,
DAYOFMONTH: 1,
DAYOFWEEK: 1,
DAYOFYEAR: 1,
DECODE: 1,
DEFAULT: 1,
DEGREES: 1,
DES_DECRYPT: 1,
DES_ENCRYPT: 1,
DIV: 1,
ELT: 1,
ENCODE: 1,
ENCRYPT: 1,
EXP: 1,
EXPORT_SET: 1,
EXTRACT: 1,
EXTRACTVALUE: 1,
FIELD: 1,
FIND_IN_SET: 1,
FLOOR: 1,
FORMAT: 1,
FOUND_ROWS: 1,
FROM_DAYS: 1,
FROM_UNIXTIME: 1,
GET_FORMAT: 1,
GET_LOCK: 1,
GREATEST: 1,
GROUP_CONCAT: 1,
HEX: 1,
HOUR: 1,
IF: 1,
IFNULL: 1,
IN: 1,
INET_ATON: 1,
INET_NTOA: 1,
INSERT: 1,
INSTR: 1,
INTERVAL: 1,
IS_FREE_LOCK: 1,
IS_USED_LOCK: 1,
IS: 1,
ISNULL: 1,
LAST_DAY: 1,
LAST_INSERT_ID: 1,
LCASE: 1,
LEAST: 1,
LEFT: 1,
LENGTH: 1,
LIKE: 1,
LN: 1,
LOAD_FILE: 1,
LOCALTIME: 1,
LOCALTIMESTAMP: 1,
LOCATE: 1,
LOG10: 1,
LOG2: 1,
LOG: 1,
LOWER: 1,
LPAD: 1,
LTRIM: 1,
MAKE_SET: 1,
MAKEDATE: 1,
MAKETIME: 1,
MASTER_POS_WAIT: 1,
MATCH: 1,
MAX: 1,
MD5: 1,
MICROSECOND: 1,
MID: 1,
MIN: 1,
MINUTE: 1,
MOD: 1,
MONTH: 1,
MONTHNAME: 1,
NAME_CONST: 1,
NOT: 1,
NOW: 1,
NULLIF: 1,
OCT: 1,
OCTET_LENGTH: 1,
OLD_PASSWORD: 1,
ORD: 1,
PASSWORD: 1,
PERIOD_ADD: 1,
PERIOD_DIFF: 1,
PI: 1,
POSITION: 1,
POW: 1,
POWER: 1,
QUARTER: 1,
QUOTE: 1,
RADIANS: 1,
RAND: 1,
REGEXP: 1,
RELEASE_LOCK: 1,
REPEAT: 1,
REPLACE: 1,
REVERSE: 1,
RIGHT: 1,
RLIKE: 1,
ROUND: 1,
ROW_COUNT: 1,
RPAD: 1,
RTRIM: 1,
SCHEMA: 1,
SEC_TO_TIME: 1,
SECOND: 1,
SESSION_USER: 1,
SHA1: 1,
SHA: 1,
SIGN: 1,
SIN: 1,
SLEEP: 1,
SOUNDEX: 1,
SOUNDS: 1,
SPACE: 1,
SQRT: 1,
STD: 1,
STDDEV_POP: 1,
STDDEV_SAMP: 1,
STDDEV: 1,
STR_TO_DATE: 1,
STRCMP: 1,
SUBDATE: 1,
SUBSTR: 1,
SUBSTRING_INDEX: 1,
SUBSTRING: 1,
SUBTIME: 1,
SUM: 1,
SYSDATE: 1,
SYSTEM_USER: 1,
TAN: 1,
TIME_FORMAT: 1,
TIME_TO_SEC: 1,
TIME: 1,
TIMEDIFF: 1,
TIMESTAMP: 1,
TIMESTAMPADD: 1,
TIMESTAMPDIFF: 1,
TO_DAYS: 1,
TRIM: 1,
TRUNCATE: 1,
UCASE: 1,
UNCOMPRESS: 1,
UNCOMPRESSED_LENGTH: 1,
UNHEX: 1,
UNIX_TIMESTAMP: 1,
UPDATEXML: 1,
UPPER: 1,
USER: 1,
UTC_DATE: 1,
UTC_TIME: 1,
UTC_TIMESTAMP: 1,
UUID_SHORT: 1,
UUID: 1,
VALUES: 1,
VAR_POP: 1,
VAR_SAMP: 1,
VARIANCE: 1,
VERSION: 1,
WEEK: 1,
WEEKDAY: 1,
WEEKOFYEAR: 1,
XOR: 1,
YEAR: 1,
YEARWEEK: 1
}
}
}
[/source]
More boring stuff…
[source lang='as3']
package uk.co.riait.sql {
public class SQLKeywords {
// ist of MySQL keywords, the value has no meaning, it simply has to be there.
public static const keywords:Object = {
ADD: 1,
ALL: 1,
ALTER: 1,
ANALYZE: 1,
AND: 1,
AS: 1,
ASC: 1,
ASENSITIVE: 1,
BEFORE: 1,
BETWEEN: 1,
BIGINT: 1,
BINARY: 1,
BLOB: 1,
BOTH: 1,
BY: 1,
CALL: 1,
CASCADE: 1,
CASE: 1,
CHANGE: 1,
CHAR: 1,
CHARACTER: 1,
CHECK: 1,
COLLATE: 1,
COLUMN: 1,
CONDITION: 1,
CONSTRAINT: 1,
CONTINUE: 1,
CONVERT: 1,
CREATE: 1,
CROSS: 1,
CURRENT_DATE: 1,
CURRENT_TIME: 1,
CURRENT_TIMESTAMP: 1,
CURRENT_USER: 1,
CURSOR: 1,
DATABASE: 1,
DATABASES: 1,
DAY_HOUR: 1,
DAY_MICROSECOND: 1,
DAY_MINUTE: 1,
DAY_SECOND: 1,
DEC: 1,
DECIMAL: 1,
DECLARE: 1,
DEFAULT: 1,
DELAYED: 1,
DELETE: 1,
DESC: 1,
DESCRIBE: 1,
DETERMINISTIC: 1,
DISTINCT: 1,
DISTINCTROW: 1,
DIV: 1,
DOUBLE: 1,
DROP: 1,
DUAL: 1,
EACH: 1,
ELSE: 1,
ELSEIF: 1,
ENCLOSED: 1,
ESCAPED: 1,
EXISTS: 1,
EXIT: 1,
EXPLAIN: 1,
FALSE: 1,
FETCH: 1,
FLOAT: 1,
FLOAT4: 1,
FLOAT8: 1,
FOR: 1,
FORCE: 1,
FOREIGN: 1,
FROM: 1,
FULLTEXT: 1,
GRANT: 1,
GROUP: 1,
HAVING: 1,
HIGH_PRIORITY: 1,
HOUR_MICROSECOND: 1,
HOUR_MINUTE: 1,
HOUR_SECOND: 1,
IF: 1,
IGNORE: 1,
IN: 1,
INDEX: 1,
INFILE: 1,
INNER: 1,
INOUT: 1,
INSENSITIVE: 1,
INSERT: 1,
INT: 1,
INT1: 1,
INT2: 1,
INT3: 1,
INT4: 1,
INT8: 1,
INTEGER: 1,
INTERVAL: 1,
INTO: 1,
IS: 1,
ITERATE: 1,
JOIN: 1,
KEY: 1,
KEYS: 1,
KILL: 1,
LEADING: 1,
LEAVE: 1,
LEFT: 1,
LIKE: 1,
LIMIT: 1,
LINES: 1,
LOAD: 1,
LOCALTIME: 1,
LOCALTIMESTAMP: 1,
LOCK: 1,
LONG: 1,
LONGBLOB: 1,
LONGTEXT: 1,
LOOP: 1,
LOW_PRIORITY: 1,
MATCH: 1,
MEDIUMBLOB: 1,
MEDIUMINT: 1,
MEDIUMTEXT: 1,
MIDDLEINT: 1,
MINUTE_MICROSECOND: 1,
MINUTE_SECOND: 1,
MOD: 1,
MODIFIES: 1,
NATURAL: 1,
NOT: 1,
NO_WRITE_TO_BINLOG: 1,
NULL: 1,
NUMERIC: 1,
ON: 1,
OPTIMIZE: 1,
OPTION: 1,
OPTIONALLY: 1,
OR: 1,
ORDER: 1,
OUT: 1,
OUTER: 1,
OUTFILE: 1,
PRECISION: 1,
PRIMARY: 1,
PROCEDURE: 1,
PURGE: 1,
READ: 1,
READS: 1,
REAL: 1,
REFERENCES: 1,
REGEXP: 1,
RELEASE: 1,
RENAME: 1,
REPEAT: 1,
REPLACE: 1,
REQUIRE: 1,
RESTRICT: 1,
RETURN: 1,
REVOKE: 1,
RIGHT: 1,
RLIKE: 1,
SCHEMA: 1,
SCHEMAS: 1,
SECOND_MICROSECOND: 1,
SELECT: 1,
SENSITIVE: 1,
SEPARATOR: 1,
SET: 1,
SHOW: 1,
SMALLINT: 1,
SONAME: 1,
SPATIAL: 1,
SPECIFIC: 1,
SQL: 1,
SQLEXCEPTION: 1,
SQLSTATE: 1,
SQLWARNING: 1,
SQL_BIG_RESULT: 1,
SQL_CALC_FOUND_ROWS: 1,
SQL_SMALL_RESULT: 1,
SSL: 1,
STARTING: 1,
STRAIGHT_JOIN: 1,
TABLE: 1,
TERMINATED: 1,
THEN: 1,
TINYBLOB: 1,
TINYINT: 1,
TINYTEXT: 1,
TO: 1,
TRAILING: 1,
TRIGGER: 1,
TRUE: 1,
UNDO: 1,
UNION: 1,
UNIQUE: 1,
UNLOCK: 1,
UNSIGNED: 1,
UPDATE: 1,
USAGE: 1,
USE: 1,
USING: 1,
UTC_DATE: 1,
UTC_TIME: 1,
UTC_TIMESTAMP: 1,
VALUES: 1,
VARBINARY: 1,
VARCHAR: 1,
VARCHARACTER: 1,
VARYING: 1,
WHEN: 1,
WHERE: 1,
WHILE: 1,
WITH: 1,
WRITE: 1,
XOR: 1,
YEAR_MONTH: 1,
ZEROFILL: 1
}
}
}
[/source]
These two classes provide all the information required to make the decision of what is a function and what is a keyword. Some data may overlap but it has no meaning, if something is a function it won’t be matched as a keyword.
I am using simple objects because I can simply match a keyword or a function using this code:
[source lang='as3']
SQLKeywords.keywords.hasOwnProperty("TOKEN_I_AM_LOOKING_FOR");
SQLFunctions.functions.hasOwnProperty("FUNCTION_I_AM_LOOKING_FOR");
[/source]
Next class provides additional information about colors:
[source lang='as3']
package uk.co.riait.sql {
public class SQLColorUtil {
// tokens colors, a key is a token type:
public static var sqlColors:Object = {
1: 0x3B83BF,
2: 0xB22CCF,
3: 0x65CF2C,
4: 0xCC0000,
5: 0x000000,
6: 0x4EB160,
7: 0x000000,
8: 0x7C0F0F,
9: 0x999999,
10: 0x000000,
11: 0x3B83BF
};
}
}
[/source]
The keys match a token types. Here is the class containing a token type information:
[source lang='as3']
package uk.co.riait.sql {
public class SQLTokenType {
// MySQL keyword token:
public static const KEYWORD:int = 1;
// MySQL function token:
public static const FUNCTION:int = 2;
// string token:
public static const STRING:int = 3;
// number token:
public static const NUMBER:int = 4;
// just token, for example comma:
public static const TOKEN:int = 5;
// escaped string - using ``:
public static const ESCAPED_STRING:int = 6;
// any key:
public static const KEY:int = 7;
// variable name - @example:
public static const VARIABLE:int = 8;
// single or multi line comment:
public static const COMMENT:int = 9;
// dot, required in the future for code completion:
public static const DOT:int = 10;
// any operator:
public static const OPERATOR:int = 11;
}
}
[/source]
Only two classes left before I go to the tokenizer itself. First class represents a token. It contains the value, the type, start position and length of the token. It is used to apply correct TextRange objects in the text area. Second class is the event class dispatched by the tokenizer when it finds a token.
[source lang='as3']
package uk.co.riait.sql {
public class SQLToken {
private var _value:String;
private var _type:int;
private var _start:int;
private var _length:int;
public function SQLToken(value:String, type:int, start:int, length:int) {
this._length = length;
this._start = start;
this._type = type;
this._value = value;
}
public function get length():int {
return _length;
}
public function get start():int {
return _start;
}
public function get type():int {
return _type;
}
public function get value():String {
return _value;
}
}
}
[/source]
[source lang='as3']
package uk.co.riait.sql {
import flash.events.Event;
public class SQLTokenizerEvent extends Event {
public static const TOKEN_READY:String = "tokenReady";
private var _token:SQLToken;
public function SQLTokenizerEvent( type:String, token:SQLToken, bubbles:Boolean=false, cancelable:Boolean=false ) {
super( type, bubbles, cancelable );
_token = token;
}
public function get token():SQLToken {
return this._token;
}
override public function clone():Event {
return new SQLTokenizerEvent( type, token, bubbles, cancelable );
}
}
}
[/source]
It is time to build the tokenizer. As a test input I am using following string:
[source lang='sql'] /* this is just a test multi line comment */ select 2+2 as `expression`, `name`, `surname` from `my_table` -- single line comment where `age` != @variable [/source]
SQLTokenizer class definition with a few private fields comes first.
[source lang='as3']
package uk.co.riait.sql {
import flash.events.EventDispatcher;
public class SQLTokenizer extends EventDispatcher {
// holds an input data:
private var _input:String;
// current position in the data:
private var _position:int = 0;
// current character:
private var ch:String;
// current character code:
private var chCode:int = 0;
public function SQLTokenizer(input:String) {
// set the data and move to the first token:
_input = input;
next();
}
}
}
[/source]
It extends an EventDispatcher so it can dispatch (emit) tokens once they are found.
Now I am going to discuss each method one by one. The most important ones are parse() and emit(). First one loops through the input data and identifies tokens. If it finds more complex one it calls an appropriate method in order to process it. There are two groups of these methods. First one is read*, it returns a token containing the data it read, second group is a skip* group. Instead of returning a token it returns a number of characters skipped (well, not quite right, more details below), no data as the data is considered as meaningless. The latter one just dispatches a new SQLTokenEvent.
[source lang='as3']
public function parse():void {
// while not end of data found:
while (true) {
// create empty token:
var token:SQLToken;
// skip all whitespace:
skipIgnored();
// if current character code is 0 end of data is reached:
if ( currentCode() == 0 ) {
break;
}
// remeber start position:
var start:int = _position-1;
// and check what is the character code:
switch ( currentCode() ) {
case 47: // /
if ( preview() == 42 ) { // 42 is *
// matched a multi line comment:
next(); // skip *
token = new SQLToken(null, SQLTokenType.COMMENT, start, skipComment());
} else {
// matched single / operator:
token = new SQLToken(null, SQLTokenType.OPERATOR, start, skipOperator());
}
break;
case 38:
case 126:
case 124:
case 94:
case 61:
case 62:
case 60:
case 37:
case 33:
case 43:
case 42:
// matched an operator:
token = new SQLToken(null, SQLTokenType.OPERATOR, start, skipOperator());
break;
case 40: // (
case 41: // )
case 44: // ,
next(); // skip
token = new SQLToken(ch, SQLTokenType.TOKEN, start, 1);
break;
case 35: // # single line comment:
token = new SQLToken(null, SQLTokenType.COMMENT, start, skipUntilNl());
break;
case 46: // . for code hints
next(); // skip .
token = new SQLToken(ch, SQLTokenType.DOT, start, 1);
break;
case 64: // @
token = readVariable(start);
break;
case 39: // '
// string:
token = readString(start);
break;
case 96: // `
// escaped key:
token = readEscapedKey(start);
break;
default:
if ( isDigit( currentCode() ) || currentCode() == 45 ) { // 45 is -
token = readNumber(start);
} else {
token = readKey(start);
}
break;
}
// emit the token:
emit(token);
}
}
private function emit(token:SQLToken):void {
dispatchEvent(new SQLTokenizerEvent(SQLTokenizerEvent.TOKEN_READY, token));
}
[/source]
These methods follow emit(), all belong to read* group:
- readString(): reads a MySQL single quoted string
- readKey(): reads a literal, it may be a function name, key name or any other literal
- readVariable(): reads a variable, variable starts with @ and is followed by a literal
- readEscapedKey(): reads a MySQL escaped key, escaped key is surrounded by `
- readNumber(): reads a numeric value
[source lang='as3']
// reads a quoted string:
private function readString(start:int):SQLToken {
var sep:String = "'";
var input:String = "";
// until closing ' found append character to the data:
while ( true ) {
input += ch;
next();
// if character is ' or is end of data, break
if ( currentCode() == 39 || currentCode() == 0 ) { // 39 is '
input += ch;
next(); // skip `
break;
}
}
return new SQLToken(input, SQLTokenType.ESCAPED_STRING, start, input.length);
}
// reads any key, it may be a function name or keyword name, so-called literal:
private function readKey(start:int):SQLToken {
var name:String = "";
// until a non-literal character is found append character to the data:
while ( true ) {
if (
isWhiteSpace( currentCode() )
|| isOperatorChar( currentCode() )
|| currentCode() == 44 // ,
|| currentCode() == 40 // (
|| currentCode() == 41 // )
|| currentCode() == 0 ) {
break;
}
name += ch;
next();
}
// check if it is a keyword:
if ( SQLKeywords.keywords.hasOwnProperty(name.toUpperCase()) ) {
return new SQLToken(name, SQLTokenType.KEYWORD, start, name.length);
}
// or function:
if ( SQLFunctions.functions.hasOwnProperty(name.toUpperCase()) ) {
return new SQLToken(name, SQLTokenType.FUNCTION, start, name.length);
}
// otherwise it is just a key:
return new SQLToken(name, SQLTokenType.KEY, start, name.length);
}
// reads a variable name:
private function readVariable(start:int):SQLToken {
var name:String = "";
// until a non-literal character is found append character to the data:
while ( true ) {
if (
isWhiteSpace( currentCode() )
|| isOperatorChar( currentCode() )
|| currentCode() == 44 // ,
|| currentCode() == 40 // (
|| currentCode() == 41 // )
|| currentCode() == 0 ) {
break;
}
name += ch;
next();
}
return new SQLToken(name, SQLTokenType.VARIABLE, start, name.length);
}
// reads a MySQL escaped string:
private function readEscapedKey(start:int):SQLToken {
var sep:String = "`";
var input:String = "";
// until matching ` is found append character to the data:
while ( true ) {
input += ch;
next();
// if matching ` or end of data is found, break:
if ( currentCode() == 96 || currentCode() == 0 ) { // 96 is `
input += ch;
next(); // skip `
break;
}
}
return new SQLToken(input, SQLTokenType.ESCAPED_STRING, start, input.length);
}
// reads a number:
// this bit comes from coreutils JSON class,
// modified to fit its current usage
private function readNumber(start:int):SQLToken {
// the string to accumulate the number characters
// into that we'll convert to a number at the end
var input:String = "";
// check for a negative number
if ( currentCode() == 45 ) { // 45 is -
input += '-';
next();
}
// the number must start with a digit
if ( !isDigit( currentCode() ) && currentCode() != 46 ) { // 46 is .
// second - found, it is single line comment with -- notation:
if ( currentCode() == 45 ) { // 45 is -
next(); // skip operator
return new SQLToken(null, SQLTokenType.COMMENT, start, skipUntilNl()+2);
}
return new SQLToken("-", SQLTokenType.TOKEN, start, 1);
}
// read numbers while we can
while ( isDigit( currentCode() ) && currentCode() != 0 ) {
input += ch;
next();
}
// check for a decimal value
if ( currentCode() == 46 ) { // 46 is .
input += ".";
next();
while ( isDigit( currentCode() ) ) {
input += ch;
next();
}
}
// check for scientific notation
if ( currentCode() == 101 || currentCode() == 69 ) { // 101 is e, 69 is E
input += "e"
next();
// check for sign
if ( currentCode() == 43 || currentCode() == 45 ) { // 43 is +, 45 is -
input += ch;
next();
}
// read in the exponent
while ( isDigit( currentCode() ) ) {
input += ch;
next();
}
}
return new SQLToken( input, SQLTokenType.NUMBER, start, input.length );
}
[/source]
Next few methods belong to skip* group.
- skipIgnored(): calls any other method that skips ignored characters, currently skips white spaces only
- skipUntilNl(): skips everything until the end of the line is found, used to skip single line comments
- skipComment(): skips multi line comments by scanning for a */
- skipWhite(): skips any white space
Two of above methods, skipUntilNl() and skipComment() return the number of characters skipped which is used as length parameter for SQLToken constructor. skipIgnored() and skipWhite() do not return anything, number of white space characters is not important.
[source lang='as3']
private function skipIgnored():void {
skipWhite();
}
// skips all operator characters and reports
// how many skipped:
private function skipOperator():int {
var skipped:int = 0;
while ( true ) {
if ( isOperatorChar(currentCode()) ) {
next();
skipped++;
} else {
break;
}
}
return skipped;
}
// skips any characters until new line and reports how many
// characters it skipped:
private function skipUntilNl():int {
var skipped:int = 0;
while ( currentCode() != 10 && currentCode() != 13 && currentCode() != 0 ) {
next();
skipped++;
}
return skipped;
}
// skips multi line comment and reports how many
// characters it skipped
private function skipComment():int {
var skipped:int = 0;
while ( true ) {
next();
skipped++;
// si current character * and next / ? or is it end of data?
if ( (currentCode() == 42 && preview() == 47) || currentCode() == 0 ) { // 42 is *, 47 is /
next(); next();
skipped+=3;
break;
}
}
return skipped;
}
private function skipWhite():void {
// As long as there are white spaces in the input
// stream, advance
while ( isWhiteSpace( currentCode() ) ) {
next();
}
}
[/source]
A few methods which are used to move around and provide more information about the current character.
- currentCode(): returns currently processed character’s code
- next(): advances to the next character in the input, if there is no next character 0 is used which indicates the end of the data
- preview(): this method simply shows what is the next character without progressing, it is used to make the decision of what the current token may be
- prev(): moves one character back
[source lang='as3']
// returns current character code:
public function currentCode():int {
return chCode;
}
// moves to the next character, if no next character 0 set is
// 0 is used to mark end of data:
public function next():String {
ch = _input.charAt(_position);
chCode = (ch.length==1) ? ch.charCodeAt(0) : 0;
_position++;
return ch;
}
// returns next character without progressing to it
// used to make a decision based on following character
public function preview():int {
return _input.charCodeAt(_position);
}
// moves back one character:
public function prev():String {
_position--;
ch = _input.charAt(_position);
return ch;
}
[/source]
Finally, 3 self descriptive methods that test a character to see what character group it belongs to.
[source lang='as3']
// is it a tab, \r, \n or space?
private function isWhiteSpace( char:int ):Boolean {
var chars:Array = [9,10,13,32];
return chars.indexOf(char) > -1;
}
// is is a digit?
private function isDigit( char:int ):Boolean {
return ( char >= 48 && char <= 57 );
}
// is it a operator character?
private function isOperatorChar(char:int):Boolean {
/*
& - 38
~ - 126
| - 124
^ - 94
/ - 47
= - 61
> - 62
< - 60
% - 37
! - 33
+ - 43
* - 42
*/
var chars:Array = [38,126,124,94,47,61,62,60,37,33,43,42];
return chars.indexOf(char) > -1;
}
[/source]
And that is all the code needed by the tokenizer. I have prepared a simple demo which can be accessed here.
If you would like to download the code you can do it from here. As part of the MySQL on AIR project it is available under the MIT license.