litjson/litjson/Lexer.cs

854 lines
19 KiB
C#

#region Header
/**
* Lexer.cs
* JSON lexer implementation based on a finite state machine.
*
* The authors disclaim copyright to this source code. For more details, see
* the COPYING file included with this distribution.
**/
#endregion
using System;
using System.IO;
using System.Text;
namespace LitJson {
internal class FsmContext {
public Boolean Return;
public Int32 NextState;
public Lexer L;
public Int32 StateStack;
}
internal class Lexer {
#region Fields
private delegate Boolean StateHandler(FsmContext ctx);
private static readonly Int32[] fsm_return_table;
private static readonly StateHandler[] fsm_handler_table;
private readonly FsmContext fsm_context;
private Int32 input_buffer;
private Int32 input_char;
private readonly TextReader reader;
private Int32 state;
private readonly StringBuilder string_buffer;
private Int32 unichar;
#endregion
#region Properties
public Boolean AllowComments { get; set; }
public Boolean AllowSingleQuotedStrings { get; set; }
public Boolean EndOfInput { get; private set; }
public Int32 Token { get; private set; }
public String StringValue { get; private set; }
#endregion
#region Constructors
static Lexer() => PopulateFsmTables(out fsm_handler_table, out fsm_return_table);
public Lexer(TextReader reader) {
this.AllowComments = true;
this.AllowSingleQuotedStrings = true;
this.input_buffer = 0;
this.string_buffer = new StringBuilder(128);
this.state = 1;
this.EndOfInput = false;
this.reader = reader;
this.fsm_context = new FsmContext {
L = this
};
}
#endregion
#region Static Methods
private static Int32 HexValue(Int32 digit) {
switch(digit) {
case 'a':
case 'A':
return 10;
case 'b':
case 'B':
return 11;
case 'c':
case 'C':
return 12;
case 'd':
case 'D':
return 13;
case 'e':
case 'E':
return 14;
case 'f':
case 'F':
return 15;
default:
return digit - '0';
}
}
private static void PopulateFsmTables(out StateHandler[] fsm_handler_table, out Int32[] fsm_return_table) {
// See section A.1. of the manual for details of the finite
// state machine.
fsm_handler_table = new StateHandler[28] {
State1,
State2,
State3,
State4,
State5,
State6,
State7,
State8,
State9,
State10,
State11,
State12,
State13,
State14,
State15,
State16,
State17,
State18,
State19,
State20,
State21,
State22,
State23,
State24,
State25,
State26,
State27,
State28
};
fsm_return_table = new Int32[28] {
(Int32) ParserToken.Char,
0,
(Int32) ParserToken.Number,
(Int32) ParserToken.Number,
0,
(Int32) ParserToken.Number,
0,
(Int32) ParserToken.Number,
0,
0,
(Int32) ParserToken.True,
0,
0,
0,
(Int32) ParserToken.False,
0,
0,
(Int32) ParserToken.Null,
(Int32) ParserToken.CharSeq,
(Int32) ParserToken.Char,
0,
0,
(Int32) ParserToken.CharSeq,
(Int32) ParserToken.Char,
0,
0,
0,
0
};
}
private static Char ProcessEscChar(Int32 esc_char) {
switch(esc_char) {
case '"':
case '\'':
case '\\':
case '/':
return Convert.ToChar(esc_char);
case 'n':
return '\n';
case 't':
return '\t';
case 'r':
return '\r';
case 'b':
return '\b';
case 'f':
return '\f';
default:
// Unreachable
return '?';
}
}
private static Boolean State1(FsmContext ctx) {
while(ctx.L.GetChar()) {
if(ctx.L.input_char == ' ' ||
ctx.L.input_char >= '\t' && ctx.L.input_char <= '\r') {
continue;
}
if(ctx.L.input_char >= '1' && ctx.L.input_char <= '9') {
_ = ctx.L.string_buffer.Append((Char)ctx.L.input_char);
ctx.NextState = 3;
return true;
}
switch(ctx.L.input_char) {
case '"':
ctx.NextState = 19;
ctx.Return = true;
return true;
case ',':
case ':':
case '[':
case ']':
case '{':
case '}':
ctx.NextState = 1;
ctx.Return = true;
return true;
case '-':
_ = ctx.L.string_buffer.Append((Char)ctx.L.input_char);
ctx.NextState = 2;
return true;
case '0':
_ = ctx.L.string_buffer.Append((Char)ctx.L.input_char);
ctx.NextState = 4;
return true;
case 'f':
ctx.NextState = 12;
return true;
case 'n':
ctx.NextState = 16;
return true;
case 't':
ctx.NextState = 9;
return true;
case '\'':
if(!ctx.L.AllowSingleQuotedStrings) {
return false;
}
ctx.L.input_char = '"';
ctx.NextState = 23;
ctx.Return = true;
return true;
case '/':
if(!ctx.L.AllowComments) {
return false;
}
ctx.NextState = 25;
return true;
default:
return false;
}
}
return true;
}
private static Boolean State2(FsmContext ctx) {
_ = ctx.L.GetChar();
if(ctx.L.input_char >= '1' && ctx.L.input_char <= '9') {
_ = ctx.L.string_buffer.Append((Char)ctx.L.input_char);
ctx.NextState = 3;
return true;
}
switch(ctx.L.input_char) {
case '0':
_ = ctx.L.string_buffer.Append((Char)ctx.L.input_char);
ctx.NextState = 4;
return true;
default:
return false;
}
}
private static Boolean State3(FsmContext ctx) {
while(ctx.L.GetChar()) {
if(ctx.L.input_char >= '0' && ctx.L.input_char <= '9') {
_ = ctx.L.string_buffer.Append((Char)ctx.L.input_char);
continue;
}
if(ctx.L.input_char == ' ' ||
ctx.L.input_char >= '\t' && ctx.L.input_char <= '\r') {
ctx.Return = true;
ctx.NextState = 1;
return true;
}
switch(ctx.L.input_char) {
case ',':
case ']':
case '}':
ctx.L.UngetChar();
ctx.Return = true;
ctx.NextState = 1;
return true;
case '.':
_ = ctx.L.string_buffer.Append((Char)ctx.L.input_char);
ctx.NextState = 5;
return true;
case 'e':
case 'E':
_ = ctx.L.string_buffer.Append((Char)ctx.L.input_char);
ctx.NextState = 7;
return true;
default:
return false;
}
}
return true;
}
private static Boolean State4(FsmContext ctx) {
_ = ctx.L.GetChar();
if(ctx.L.input_char == ' ' ||
ctx.L.input_char >= '\t' && ctx.L.input_char <= '\r') {
ctx.Return = true;
ctx.NextState = 1;
return true;
}
switch(ctx.L.input_char) {
case ',':
case ']':
case '}':
ctx.L.UngetChar();
ctx.Return = true;
ctx.NextState = 1;
return true;
case '.':
_ = ctx.L.string_buffer.Append((Char)ctx.L.input_char);
ctx.NextState = 5;
return true;
case 'e':
case 'E':
_ = ctx.L.string_buffer.Append((Char)ctx.L.input_char);
ctx.NextState = 7;
return true;
default:
return false;
}
}
private static Boolean State5(FsmContext ctx) {
_ = ctx.L.GetChar();
if(ctx.L.input_char >= '0' && ctx.L.input_char <= '9') {
_ = ctx.L.string_buffer.Append((Char)ctx.L.input_char);
ctx.NextState = 6;
return true;
}
return false;
}
private static Boolean State6(FsmContext ctx) {
while(ctx.L.GetChar()) {
if(ctx.L.input_char >= '0' && ctx.L.input_char <= '9') {
_ = ctx.L.string_buffer.Append((Char)ctx.L.input_char);
continue;
}
if(ctx.L.input_char == ' ' ||
ctx.L.input_char >= '\t' && ctx.L.input_char <= '\r') {
ctx.Return = true;
ctx.NextState = 1;
return true;
}
switch(ctx.L.input_char) {
case ',':
case ']':
case '}':
ctx.L.UngetChar();
ctx.Return = true;
ctx.NextState = 1;
return true;
case 'e':
case 'E':
_ = ctx.L.string_buffer.Append((Char)ctx.L.input_char);
ctx.NextState = 7;
return true;
default:
return false;
}
}
return true;
}
private static Boolean State7(FsmContext ctx) {
_ = ctx.L.GetChar();
if(ctx.L.input_char >= '0' && ctx.L.input_char <= '9') {
_ = ctx.L.string_buffer.Append((Char)ctx.L.input_char);
ctx.NextState = 8;
return true;
}
switch(ctx.L.input_char) {
case '+':
case '-':
_ = ctx.L.string_buffer.Append((Char)ctx.L.input_char);
ctx.NextState = 8;
return true;
default:
return false;
}
}
private static Boolean State8(FsmContext ctx) {
while(ctx.L.GetChar()) {
if(ctx.L.input_char >= '0' && ctx.L.input_char <= '9') {
_ = ctx.L.string_buffer.Append((Char)ctx.L.input_char);
continue;
}
if(ctx.L.input_char == ' ' ||
ctx.L.input_char >= '\t' && ctx.L.input_char <= '\r') {
ctx.Return = true;
ctx.NextState = 1;
return true;
}
switch(ctx.L.input_char) {
case ',':
case ']':
case '}':
ctx.L.UngetChar();
ctx.Return = true;
ctx.NextState = 1;
return true;
default:
return false;
}
}
return true;
}
private static Boolean State9(FsmContext ctx) {
_ = ctx.L.GetChar();
switch(ctx.L.input_char) {
case 'r':
ctx.NextState = 10;
return true;
default:
return false;
}
}
private static Boolean State10(FsmContext ctx) {
_ = ctx.L.GetChar();
switch(ctx.L.input_char) {
case 'u':
ctx.NextState = 11;
return true;
default:
return false;
}
}
private static Boolean State11(FsmContext ctx) {
_ = ctx.L.GetChar();
switch(ctx.L.input_char) {
case 'e':
ctx.Return = true;
ctx.NextState = 1;
return true;
default:
return false;
}
}
private static Boolean State12(FsmContext ctx) {
_ = ctx.L.GetChar();
switch(ctx.L.input_char) {
case 'a':
ctx.NextState = 13;
return true;
default:
return false;
}
}
private static Boolean State13(FsmContext ctx) {
_ = ctx.L.GetChar();
switch(ctx.L.input_char) {
case 'l':
ctx.NextState = 14;
return true;
default:
return false;
}
}
private static Boolean State14(FsmContext ctx) {
_ = ctx.L.GetChar();
switch(ctx.L.input_char) {
case 's':
ctx.NextState = 15;
return true;
default:
return false;
}
}
private static Boolean State15(FsmContext ctx) {
_ = ctx.L.GetChar();
switch(ctx.L.input_char) {
case 'e':
ctx.Return = true;
ctx.NextState = 1;
return true;
default:
return false;
}
}
private static Boolean State16(FsmContext ctx) {
_ = ctx.L.GetChar();
switch(ctx.L.input_char) {
case 'u':
ctx.NextState = 17;
return true;
default:
return false;
}
}
private static Boolean State17(FsmContext ctx) {
_ = ctx.L.GetChar();
switch(ctx.L.input_char) {
case 'l':
ctx.NextState = 18;
return true;
default:
return false;
}
}
private static Boolean State18(FsmContext ctx) {
_ = ctx.L.GetChar();
switch(ctx.L.input_char) {
case 'l':
ctx.Return = true;
ctx.NextState = 1;
return true;
default:
return false;
}
}
private static Boolean State19(FsmContext ctx) {
while(ctx.L.GetChar()) {
switch(ctx.L.input_char) {
case '"':
ctx.L.UngetChar();
ctx.Return = true;
ctx.NextState = 20;
return true;
case '\\':
ctx.StateStack = 19;
ctx.NextState = 21;
return true;
default:
_ = ctx.L.string_buffer.Append((Char)ctx.L.input_char);
continue;
}
}
return true;
}
private static Boolean State20(FsmContext ctx) {
_ = ctx.L.GetChar();
switch(ctx.L.input_char) {
case '"':
ctx.Return = true;
ctx.NextState = 1;
return true;
default:
return false;
}
}
private static Boolean State21(FsmContext ctx) {
_ = ctx.L.GetChar();
switch(ctx.L.input_char) {
case 'u':
ctx.NextState = 22;
return true;
case '"':
case '\'':
case '/':
case '\\':
case 'b':
case 'f':
case 'n':
case 'r':
case 't':
_ = ctx.L.string_buffer.Append(
ProcessEscChar(ctx.L.input_char));
ctx.NextState = ctx.StateStack;
return true;
default:
return false;
}
}
private static Boolean State22(FsmContext ctx) {
Int32 counter = 0;
Int32 mult = 4096;
ctx.L.unichar = 0;
while(ctx.L.GetChar()) {
if(ctx.L.input_char >= '0' && ctx.L.input_char <= '9' ||
ctx.L.input_char >= 'A' && ctx.L.input_char <= 'F' ||
ctx.L.input_char >= 'a' && ctx.L.input_char <= 'f') {
ctx.L.unichar += HexValue(ctx.L.input_char) * mult;
counter++;
mult /= 16;
if(counter == 4) {
_ = ctx.L.string_buffer.Append(
Convert.ToChar(ctx.L.unichar));
ctx.NextState = ctx.StateStack;
return true;
}
continue;
}
return false;
}
return true;
}
private static Boolean State23(FsmContext ctx) {
while(ctx.L.GetChar()) {
switch(ctx.L.input_char) {
case '\'':
ctx.L.UngetChar();
ctx.Return = true;
ctx.NextState = 24;
return true;
case '\\':
ctx.StateStack = 23;
ctx.NextState = 21;
return true;
default:
_ = ctx.L.string_buffer.Append((Char)ctx.L.input_char);
continue;
}
}
return true;
}
private static Boolean State24(FsmContext ctx) {
_ = ctx.L.GetChar();
switch(ctx.L.input_char) {
case '\'':
ctx.L.input_char = '"';
ctx.Return = true;
ctx.NextState = 1;
return true;
default:
return false;
}
}
private static Boolean State25(FsmContext ctx) {
_ = ctx.L.GetChar();
switch(ctx.L.input_char) {
case '*':
ctx.NextState = 27;
return true;
case '/':
ctx.NextState = 26;
return true;
default:
return false;
}
}
private static Boolean State26(FsmContext ctx) {
while(ctx.L.GetChar()) {
if(ctx.L.input_char == '\n') {
ctx.NextState = 1;
return true;
}
}
return true;
}
private static Boolean State27(FsmContext ctx) {
while(ctx.L.GetChar()) {
if(ctx.L.input_char == '*') {
ctx.NextState = 28;
return true;
}
}
return true;
}
private static Boolean State28(FsmContext ctx) {
while(ctx.L.GetChar()) {
if(ctx.L.input_char == '*') {
continue;
}
if(ctx.L.input_char == '/') {
ctx.NextState = 1;
return true;
}
ctx.NextState = 27;
return true;
}
return true;
}
#endregion
private Boolean GetChar() {
if((this.input_char = this.NextChar()) != -1) {
return true;
}
this.EndOfInput = true;
return false;
}
private Int32 NextChar() {
if(this.input_buffer != 0) {
Int32 tmp = this.input_buffer;
this.input_buffer = 0;
return tmp;
}
return this.reader.Read();
}
public Boolean NextToken() {
StateHandler handler;
this.fsm_context.Return = false;
while(true) {
handler = fsm_handler_table[this.state - 1];
if(!handler(this.fsm_context)) {
throw new JsonException(this.input_char);
}
if(this.EndOfInput) {
return false;
}
if(this.fsm_context.Return) {
this.StringValue = this.string_buffer.ToString();
_ = this.string_buffer.Remove(0, this.string_buffer.Length);
this.Token = fsm_return_table[this.state - 1];
if(this.Token == (Int32)ParserToken.Char) {
this.Token = this.input_char;
}
this.state = this.fsm_context.NextState;
return true;
}
this.state = this.fsm_context.NextState;
}
}
private void UngetChar() => this.input_buffer = this.input_char;
}
}