using System;
using System.Collections.Generic;
using System.Linq;
namespace Unosquare.Swan.Abstractions {
///
/// Represents a generic tokenizer.
///
public abstract class Tokenizer {
private const Char PeriodChar = '.';
private const Char CommaChar = ',';
private const Char StringQuotedChar = '"';
private const Char OpenFuncChar = '(';
private const Char CloseFuncChar = ')';
private const Char NegativeChar = '-';
private const String OpenFuncStr = "(";
private readonly List _operators = new List();
///
/// Initializes a new instance of the class.
/// This constructor will use the following default operators:
///
///
///
/// Operator
/// Precedence
///
/// -
/// =
/// 1
///
/// -
/// !=
/// 1
///
/// -
/// >
/// 2
///
/// -
/// <
/// 2
///
/// -
/// >=
/// 2
///
/// -
/// <=
/// 2
///
/// -
/// +
/// 3
///
/// -
/// &
/// 3
///
/// -
/// -
/// 3
///
/// -
/// *
/// 4
///
/// -
/// (backslash)
/// 4
///
/// -
/// /
/// 4
///
/// -
/// ^
/// 4
///
///
///
/// The input.
protected Tokenizer(String input) {
this._operators.AddRange(this.GetDefaultOperators());
this.Tokenize(input);
}
///
/// Initializes a new instance of the class.
///
/// The input.
/// The operators to use.
protected Tokenizer(String input, IEnumerable operators) {
this._operators.AddRange(operators);
this.Tokenize(input);
}
///
/// Gets the tokens.
///
///
/// The tokens.
///
public List Tokens { get; } = new List();
///
/// Validates the input and return the start index for tokenizer.
///
/// The input.
/// The start index.
/// true if the input is valid, otherwise false.
public abstract Boolean ValidateInput(String input, out Int32 startIndex);
///
/// Resolves the type of the function or member.
///
/// The input.
/// The token type.
public abstract TokenType ResolveFunctionOrMemberType(String input);
///
/// Evaluates the function or member.
///
/// The input.
/// The position.
/// true if the input is a valid function or variable, otherwise false.
public virtual Boolean EvaluateFunctionOrMember(String input, Int32 position) => false;
///
/// Gets the default operators.
///
/// An array with the operators to use for the tokenizer.
public virtual Operator[] GetDefaultOperators() => new[]
{
new Operator {Name = "=", Precedence = 1},
new Operator {Name = "!=", Precedence = 1},
new Operator {Name = ">", Precedence = 2},
new Operator {Name = "<", Precedence = 2},
new Operator {Name = ">=", Precedence = 2},
new Operator {Name = "<=", Precedence = 2},
new Operator {Name = "+", Precedence = 3},
new Operator {Name = "&", Precedence = 3},
new Operator {Name = "-", Precedence = 3},
new Operator {Name = "*", Precedence = 4},
new Operator {Name = "/", Precedence = 4},
new Operator {Name = "\\", Precedence = 4},
new Operator {Name = "^", Precedence = 4},
};
///
/// Shunting the yard.
///
/// if set to true [include function stopper] (Token type Wall).
///
/// Enumerable of the token in in.
///
///
/// Wrong token
/// or
/// Mismatched parenthesis.
///
public virtual IEnumerable ShuntingYard(Boolean includeFunctionStopper = true) {
Stack stack = new Stack();
foreach(Token tok in this.Tokens) {
switch(tok.Type) {
case TokenType.Number:
case TokenType.Variable:
case TokenType.String:
yield return tok;
break;
case TokenType.Function:
stack.Push(tok);
break;
case TokenType.Operator:
while(stack.Any() && stack.Peek().Type == TokenType.Operator &&
this.CompareOperators(tok.Value, stack.Peek().Value)) {
yield return stack.Pop();
}
stack.Push(tok);
break;
case TokenType.Comma:
while(stack.Any() && stack.Peek().Type != TokenType.Comma &&
stack.Peek().Type != TokenType.Parenthesis) {
yield return stack.Pop();
}
break;
case TokenType.Parenthesis:
if(tok.Value == OpenFuncStr) {
if(stack.Any() && stack.Peek().Type == TokenType.Function) {
if(includeFunctionStopper) {
yield return new Token(TokenType.Wall, tok.Value);
}
}
stack.Push(tok);
} else {
while(stack.Peek().Value != OpenFuncStr) {
yield return stack.Pop();
}
_ = stack.Pop();
if(stack.Any() && stack.Peek().Type == TokenType.Function) {
yield return stack.Pop();
}
}
break;
default:
throw new InvalidOperationException("Wrong token");
}
}
while(stack.Any()) {
Token tok = stack.Pop();
if(tok.Type == TokenType.Parenthesis) {
throw new InvalidOperationException("Mismatched parenthesis");
}
yield return tok;
}
}
private static Boolean CompareOperators(Operator op1, Operator op2) => op1.RightAssociative
? op1.Precedence < op2.Precedence
: op1.Precedence <= op2.Precedence;
private void Tokenize(String input) {
if(!this.ValidateInput(input, out Int32 startIndex)) {
return;
}
for(Int32 i = startIndex; i < input.Length; i++) {
if(Char.IsWhiteSpace(input, i)) {
continue;
}
if(input[i] == CommaChar) {
this.Tokens.Add(new Token(TokenType.Comma, new String(new[] { input[i] })));
continue;
}
if(input[i] == StringQuotedChar) {
i = this.ExtractString(input, i);
continue;
}
if(Char.IsLetter(input, i) || this.EvaluateFunctionOrMember(input, i)) {
i = this.ExtractFunctionOrMember(input, i);
continue;
}
if(Char.IsNumber(input, i) ||
input[i] == NegativeChar &&
(this.Tokens.Any() && this.Tokens.Last().Type != TokenType.Number || !this.Tokens.Any())) {
i = this.ExtractNumber(input, i);
continue;
}
if(input[i] == OpenFuncChar ||
input[i] == CloseFuncChar) {
this.Tokens.Add(new Token(TokenType.Parenthesis, new String(new[] { input[i] })));
continue;
}
i = this.ExtractOperator(input, i);
}
}
private Int32 ExtractData(
String input,
Int32 i,
Func tokenTypeEvaluation,
Func evaluation,
Int32 right = 0,
Int32 left = -1) {
Int32 charCount = 0;
for(Int32 j = i + right; j < input.Length; j++) {
if(evaluation(input[j])) {
break;
}
charCount++;
}
// Extract and set the value
String value = input.SliceLength(i + right, charCount);
this.Tokens.Add(new Token(tokenTypeEvaluation(value), value));
i += charCount + left;
return i;
}
private Int32 ExtractOperator(String input, Int32 i) =>
this.ExtractData(input, i, x => TokenType.Operator, x => x == OpenFuncChar ||
x == CommaChar ||
x == PeriodChar ||
x == StringQuotedChar ||
Char.IsWhiteSpace(x) ||
Char.IsNumber(x));
private Int32 ExtractFunctionOrMember(String input, Int32 i) =>
this.ExtractData(input, i, this.ResolveFunctionOrMemberType, x => x == OpenFuncChar ||
x == CloseFuncChar ||
x == CommaChar ||
Char.IsWhiteSpace(x));
private Int32 ExtractNumber(String input, Int32 i) =>
this.ExtractData(input, i, x => TokenType.Number,
x => !Char.IsNumber(x) && x != PeriodChar && x != NegativeChar);
private Int32 ExtractString(String input, Int32 i) {
Int32 length = this.ExtractData(input, i, x => TokenType.String, x => x == StringQuotedChar, 1, 1);
// open string, report issue
if(length == input.Length && input[length - 1] != StringQuotedChar) {
throw new FormatException($"Parser error (Position {i}): Expected '\"' but got '{input[length - 1]}'.");
}
return length;
}
private Boolean CompareOperators(String op1, String op2)
=> CompareOperators(this.GetOperatorOrDefault(op1), this.GetOperatorOrDefault(op2));
private Operator GetOperatorOrDefault(String op)
=> this._operators.FirstOrDefault(x => x.Name == op) ?? new Operator { Name = op, Precedence = 0 };
}
///
/// Represents an operator with precedence.
///
public class Operator {
///
/// Gets or sets the name.
///
///
/// The name.
///
public String Name {
get; set;
}
///
/// Gets or sets the precedence.
///
///
/// The precedence.
///
public Int32 Precedence {
get; set;
}
///
/// Gets or sets a value indicating whether [right associative].
///
///
/// true if [right associative]; otherwise, false.
///
public Boolean RightAssociative {
get; set;
}
}
///
/// Represents a Token structure.
///
public struct Token {
///
/// Initializes a new instance of the struct.
///
/// The type.
/// The value.
public Token(TokenType type, String value) {
this.Type = type;
this.Value = type == TokenType.Function || type == TokenType.Operator ? value.ToLowerInvariant() : value;
}
///
/// Gets or sets the type.
///
///
/// The type.
///
public TokenType Type {
get; set;
}
///
/// Gets the value.
///
///
/// The value.
///
public String Value {
get;
}
}
///
/// Enums the token types.
///
public enum TokenType {
///
/// The number
///
Number,
///
/// The string
///
String,
///
/// The variable
///
Variable,
///
/// The function
///
Function,
///
/// The parenthesis
///
Parenthesis,
///
/// The operator
///
Operator,
///
/// The comma
///
Comma,
///
/// The wall, used to specified the end of argument list of the following function
///
Wall,
}
}