2019-12-04 17:10:06 +01:00
|
|
|
|
using System;
|
|
|
|
|
using System.Collections.Generic;
|
|
|
|
|
using System.Linq;
|
|
|
|
|
|
|
|
|
|
namespace Unosquare.Swan.Abstractions {
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Represents a generic tokenizer.
|
|
|
|
|
/// </summary>
|
|
|
|
|
public abstract class Tokenizer {
|
|
|
|
|
private const Char PeriodChar = '.';
|
|
|
|
|
private const Char CommaChar = ',';
|
|
|
|
|
private const Char StringQuotedChar = '"';
|
|
|
|
|
private const Char OpenFuncChar = '(';
|
|
|
|
|
private const Char CloseFuncChar = ')';
|
|
|
|
|
private const Char NegativeChar = '-';
|
|
|
|
|
|
|
|
|
|
private const String OpenFuncStr = "(";
|
|
|
|
|
|
|
|
|
|
private readonly List<Operator> _operators = new List<Operator>();
|
|
|
|
|
|
2019-02-17 14:08:57 +01:00
|
|
|
|
/// <summary>
|
2019-12-04 17:10:06 +01:00
|
|
|
|
/// Initializes a new instance of the <see cref="Tokenizer"/> class.
|
|
|
|
|
/// This constructor will use the following default operators:
|
|
|
|
|
///
|
|
|
|
|
/// <list type="table">
|
|
|
|
|
/// <listheader>
|
|
|
|
|
/// <term>Operator</term>
|
|
|
|
|
/// <description>Precedence</description>
|
|
|
|
|
/// </listheader>
|
|
|
|
|
/// <item>
|
|
|
|
|
/// <term>=</term>
|
|
|
|
|
/// <description>1</description>
|
|
|
|
|
/// </item>
|
|
|
|
|
/// <item>
|
|
|
|
|
/// <term>!=</term>
|
|
|
|
|
/// <description>1</description>
|
|
|
|
|
/// </item>
|
|
|
|
|
/// <item>
|
|
|
|
|
/// <term>></term>
|
|
|
|
|
/// <description>2</description>
|
|
|
|
|
/// </item>
|
|
|
|
|
/// <item>
|
|
|
|
|
/// <term><</term>
|
|
|
|
|
/// <description>2</description>
|
|
|
|
|
/// </item>
|
|
|
|
|
/// <item>
|
|
|
|
|
/// <term>>=</term>
|
|
|
|
|
/// <description>2</description>
|
|
|
|
|
/// </item>
|
|
|
|
|
/// <item>
|
|
|
|
|
/// <term><=</term>
|
|
|
|
|
/// <description>2</description>
|
|
|
|
|
/// </item>
|
|
|
|
|
/// <item>
|
|
|
|
|
/// <term>+</term>
|
|
|
|
|
/// <description>3</description>
|
|
|
|
|
/// </item>
|
|
|
|
|
/// <item>
|
|
|
|
|
/// <term>&</term>
|
|
|
|
|
/// <description>3</description>
|
|
|
|
|
/// </item>
|
|
|
|
|
/// <item>
|
|
|
|
|
/// <term>-</term>
|
|
|
|
|
/// <description>3</description>
|
|
|
|
|
/// </item>
|
|
|
|
|
/// <item>
|
|
|
|
|
/// <term>*</term>
|
|
|
|
|
/// <description>4</description>
|
|
|
|
|
/// </item>
|
|
|
|
|
/// <item>
|
|
|
|
|
/// <term>(backslash)</term>
|
|
|
|
|
/// <description>4</description>
|
|
|
|
|
/// </item>
|
|
|
|
|
/// <item>
|
|
|
|
|
/// <term>/</term>
|
|
|
|
|
/// <description>4</description>
|
|
|
|
|
/// </item>
|
|
|
|
|
/// <item>
|
|
|
|
|
/// <term>^</term>
|
|
|
|
|
/// <description>4</description>
|
|
|
|
|
/// </item>
|
|
|
|
|
/// </list>
|
2019-02-17 14:08:57 +01:00
|
|
|
|
/// </summary>
|
2019-12-04 17:10:06 +01:00
|
|
|
|
/// <param name="input">The input.</param>
|
|
|
|
|
protected Tokenizer(String input) {
|
|
|
|
|
this._operators.AddRange(this.GetDefaultOperators());
|
|
|
|
|
this.Tokenize(input);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Initializes a new instance of the <see cref="Tokenizer" /> class.
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <param name="input">The input.</param>
|
|
|
|
|
/// <param name="operators">The operators to use.</param>
|
|
|
|
|
protected Tokenizer(String input, IEnumerable<Operator> operators) {
|
|
|
|
|
this._operators.AddRange(operators);
|
|
|
|
|
this.Tokenize(input);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Gets the tokens.
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <value>
|
|
|
|
|
/// The tokens.
|
|
|
|
|
/// </value>
|
|
|
|
|
public List<Token> Tokens { get; } = new List<Token>();
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Validates the input and return the start index for tokenizer.
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <param name="input">The input.</param>
|
|
|
|
|
/// <param name="startIndex">The start index.</param>
|
|
|
|
|
/// <returns><c>true</c> if the input is valid, otherwise <c>false</c>.</returns>
|
|
|
|
|
public abstract Boolean ValidateInput(String input, out Int32 startIndex);
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Resolves the type of the function or member.
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <param name="input">The input.</param>
|
|
|
|
|
/// <returns>The token type.</returns>
|
|
|
|
|
public abstract TokenType ResolveFunctionOrMemberType(String input);
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Evaluates the function or member.
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <param name="input">The input.</param>
|
|
|
|
|
/// <param name="position">The position.</param>
|
|
|
|
|
/// <returns><c>true</c> if the input is a valid function or variable, otherwise <c>false</c>.</returns>
|
|
|
|
|
public virtual Boolean EvaluateFunctionOrMember(String input, Int32 position) => false;
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Gets the default operators.
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <returns>An array with the operators to use for the tokenizer.</returns>
|
|
|
|
|
public virtual Operator[] GetDefaultOperators() => new[]
|
2019-02-17 14:08:57 +01:00
|
|
|
|
{
|
|
|
|
|
new Operator {Name = "=", Precedence = 1},
|
|
|
|
|
new Operator {Name = "!=", Precedence = 1},
|
|
|
|
|
new Operator {Name = ">", Precedence = 2},
|
|
|
|
|
new Operator {Name = "<", Precedence = 2},
|
|
|
|
|
new Operator {Name = ">=", Precedence = 2},
|
|
|
|
|
new Operator {Name = "<=", Precedence = 2},
|
|
|
|
|
new Operator {Name = "+", Precedence = 3},
|
|
|
|
|
new Operator {Name = "&", Precedence = 3},
|
|
|
|
|
new Operator {Name = "-", Precedence = 3},
|
|
|
|
|
new Operator {Name = "*", Precedence = 4},
|
|
|
|
|
new Operator {Name = "/", Precedence = 4},
|
|
|
|
|
new Operator {Name = "\\", Precedence = 4},
|
|
|
|
|
new Operator {Name = "^", Precedence = 4},
|
2019-12-04 17:10:06 +01:00
|
|
|
|
};
|
|
|
|
|
|
2019-02-17 14:08:57 +01:00
|
|
|
|
/// <summary>
|
2019-12-04 17:10:06 +01:00
|
|
|
|
/// Shunting the yard.
|
2019-02-17 14:08:57 +01:00
|
|
|
|
/// </summary>
|
2019-12-04 17:10:06 +01:00
|
|
|
|
/// <param name="includeFunctionStopper">if set to <c>true</c> [include function stopper] (Token type <c>Wall</c>).</param>
|
|
|
|
|
/// <returns>
|
|
|
|
|
/// Enumerable of the token in in.
|
|
|
|
|
/// </returns>
|
|
|
|
|
/// <exception cref="InvalidOperationException">
|
|
|
|
|
/// Wrong token
|
|
|
|
|
/// or
|
|
|
|
|
/// Mismatched parenthesis.
|
|
|
|
|
/// </exception>
|
|
|
|
|
public virtual IEnumerable<Token> ShuntingYard(Boolean includeFunctionStopper = true) {
|
|
|
|
|
Stack<Token> stack = new Stack<Token>();
|
|
|
|
|
|
|
|
|
|
foreach(Token tok in this.Tokens) {
|
|
|
|
|
switch(tok.Type) {
|
|
|
|
|
case TokenType.Number:
|
|
|
|
|
case TokenType.Variable:
|
|
|
|
|
case TokenType.String:
|
|
|
|
|
yield return tok;
|
|
|
|
|
break;
|
|
|
|
|
case TokenType.Function:
|
|
|
|
|
stack.Push(tok);
|
|
|
|
|
break;
|
|
|
|
|
case TokenType.Operator:
|
|
|
|
|
while(stack.Any() && stack.Peek().Type == TokenType.Operator &&
|
|
|
|
|
this.CompareOperators(tok.Value, stack.Peek().Value)) {
|
|
|
|
|
yield return stack.Pop();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
stack.Push(tok);
|
|
|
|
|
break;
|
|
|
|
|
case TokenType.Comma:
|
|
|
|
|
while(stack.Any() && stack.Peek().Type != TokenType.Comma &&
|
|
|
|
|
stack.Peek().Type != TokenType.Parenthesis) {
|
|
|
|
|
yield return stack.Pop();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
case TokenType.Parenthesis:
|
|
|
|
|
if(tok.Value == OpenFuncStr) {
|
|
|
|
|
if(stack.Any() && stack.Peek().Type == TokenType.Function) {
|
|
|
|
|
if(includeFunctionStopper) {
|
|
|
|
|
yield return new Token(TokenType.Wall, tok.Value);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
stack.Push(tok);
|
|
|
|
|
} else {
|
|
|
|
|
while(stack.Peek().Value != OpenFuncStr) {
|
|
|
|
|
yield return stack.Pop();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_ = stack.Pop();
|
|
|
|
|
|
|
|
|
|
if(stack.Any() && stack.Peek().Type == TokenType.Function) {
|
|
|
|
|
yield return stack.Pop();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
throw new InvalidOperationException("Wrong token");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while(stack.Any()) {
|
|
|
|
|
Token tok = stack.Pop();
|
|
|
|
|
if(tok.Type == TokenType.Parenthesis) {
|
|
|
|
|
throw new InvalidOperationException("Mismatched parenthesis");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
yield return tok;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static Boolean CompareOperators(Operator op1, Operator op2) => op1.RightAssociative
|
|
|
|
|
? op1.Precedence < op2.Precedence
|
|
|
|
|
: op1.Precedence <= op2.Precedence;
|
|
|
|
|
|
|
|
|
|
private void Tokenize(String input) {
|
|
|
|
|
if(!this.ValidateInput(input, out Int32 startIndex)) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for(Int32 i = startIndex; i < input.Length; i++) {
|
|
|
|
|
if(Char.IsWhiteSpace(input, i)) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if(input[i] == CommaChar) {
|
|
|
|
|
this.Tokens.Add(new Token(TokenType.Comma, new String(new[] { input[i] })));
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if(input[i] == StringQuotedChar) {
|
|
|
|
|
i = this.ExtractString(input, i);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if(Char.IsLetter(input, i) || this.EvaluateFunctionOrMember(input, i)) {
|
|
|
|
|
i = this.ExtractFunctionOrMember(input, i);
|
|
|
|
|
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if(Char.IsNumber(input, i) ||
|
|
|
|
|
input[i] == NegativeChar &&
|
|
|
|
|
(this.Tokens.Any() && this.Tokens.Last().Type != TokenType.Number || !this.Tokens.Any())) {
|
|
|
|
|
i = this.ExtractNumber(input, i);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if(input[i] == OpenFuncChar ||
|
|
|
|
|
input[i] == CloseFuncChar) {
|
|
|
|
|
this.Tokens.Add(new Token(TokenType.Parenthesis, new String(new[] { input[i] })));
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
i = this.ExtractOperator(input, i);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private Int32 ExtractData(
|
|
|
|
|
String input,
|
|
|
|
|
Int32 i,
|
|
|
|
|
Func<String, TokenType> tokenTypeEvaluation,
|
|
|
|
|
Func<Char, Boolean> evaluation,
|
|
|
|
|
Int32 right = 0,
|
|
|
|
|
Int32 left = -1) {
|
|
|
|
|
Int32 charCount = 0;
|
|
|
|
|
for(Int32 j = i + right; j < input.Length; j++) {
|
|
|
|
|
if(evaluation(input[j])) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
charCount++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Extract and set the value
|
|
|
|
|
String value = input.SliceLength(i + right, charCount);
|
|
|
|
|
this.Tokens.Add(new Token(tokenTypeEvaluation(value), value));
|
|
|
|
|
|
|
|
|
|
i += charCount + left;
|
|
|
|
|
return i;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private Int32 ExtractOperator(String input, Int32 i) =>
|
|
|
|
|
this.ExtractData(input, i, x => TokenType.Operator, x => x == OpenFuncChar ||
|
|
|
|
|
x == CommaChar ||
|
|
|
|
|
x == PeriodChar ||
|
|
|
|
|
x == StringQuotedChar ||
|
|
|
|
|
Char.IsWhiteSpace(x) ||
|
|
|
|
|
Char.IsNumber(x));
|
|
|
|
|
|
|
|
|
|
private Int32 ExtractFunctionOrMember(String input, Int32 i) =>
|
|
|
|
|
this.ExtractData(input, i, this.ResolveFunctionOrMemberType, x => x == OpenFuncChar ||
|
|
|
|
|
x == CloseFuncChar ||
|
|
|
|
|
x == CommaChar ||
|
|
|
|
|
Char.IsWhiteSpace(x));
|
|
|
|
|
|
|
|
|
|
private Int32 ExtractNumber(String input, Int32 i) =>
|
|
|
|
|
this.ExtractData(input, i, x => TokenType.Number,
|
|
|
|
|
x => !Char.IsNumber(x) && x != PeriodChar && x != NegativeChar);
|
|
|
|
|
|
|
|
|
|
private Int32 ExtractString(String input, Int32 i) {
|
|
|
|
|
Int32 length = this.ExtractData(input, i, x => TokenType.String, x => x == StringQuotedChar, 1, 1);
|
|
|
|
|
|
|
|
|
|
// open string, report issue
|
|
|
|
|
if(length == input.Length && input[length - 1] != StringQuotedChar) {
|
|
|
|
|
throw new FormatException($"Parser error (Position {i}): Expected '\"' but got '{input[length - 1]}'.");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return length;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private Boolean CompareOperators(String op1, String op2)
|
|
|
|
|
=> CompareOperators(this.GetOperatorOrDefault(op1), this.GetOperatorOrDefault(op2));
|
|
|
|
|
|
|
|
|
|
private Operator GetOperatorOrDefault(String op)
|
|
|
|
|
=> this._operators.FirstOrDefault(x => x.Name == op) ?? new Operator { Name = op, Precedence = 0 };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Represents an operator with precedence.
|
|
|
|
|
/// </summary>
|
|
|
|
|
public class Operator {
|
2019-02-17 14:08:57 +01:00
|
|
|
|
/// <summary>
|
2019-12-04 17:10:06 +01:00
|
|
|
|
/// Gets or sets the name.
|
2019-02-17 14:08:57 +01:00
|
|
|
|
/// </summary>
|
2019-12-04 17:10:06 +01:00
|
|
|
|
/// <value>
|
|
|
|
|
/// The name.
|
|
|
|
|
/// </value>
|
|
|
|
|
public String Name {
|
|
|
|
|
get; set;
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-17 14:08:57 +01:00
|
|
|
|
/// <summary>
|
2019-12-04 17:10:06 +01:00
|
|
|
|
/// Gets or sets the precedence.
|
2019-02-17 14:08:57 +01:00
|
|
|
|
/// </summary>
|
2019-12-04 17:10:06 +01:00
|
|
|
|
/// <value>
|
|
|
|
|
/// The precedence.
|
|
|
|
|
/// </value>
|
|
|
|
|
public Int32 Precedence {
|
|
|
|
|
get; set;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Gets or sets a value indicating whether [right associative].
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <value>
|
|
|
|
|
/// <c>true</c> if [right associative]; otherwise, <c>false</c>.
|
|
|
|
|
/// </value>
|
|
|
|
|
public Boolean RightAssociative {
|
|
|
|
|
get; set;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Represents a Token structure.
|
|
|
|
|
/// </summary>
|
|
|
|
|
public struct Token {
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Initializes a new instance of the <see cref="Token"/> struct.
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <param name="type">The type.</param>
|
|
|
|
|
/// <param name="value">The value.</param>
|
|
|
|
|
public Token(TokenType type, String value) {
|
|
|
|
|
this.Type = type;
|
|
|
|
|
this.Value = type == TokenType.Function || type == TokenType.Operator ? value.ToLowerInvariant() : value;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Gets or sets the type.
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <value>
|
|
|
|
|
/// The type.
|
|
|
|
|
/// </value>
|
|
|
|
|
public TokenType Type {
|
|
|
|
|
get; set;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Gets the value.
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <value>
|
|
|
|
|
/// The value.
|
|
|
|
|
/// </value>
|
|
|
|
|
public String Value {
|
|
|
|
|
get;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Enums the token types.
|
|
|
|
|
/// </summary>
|
|
|
|
|
public enum TokenType {
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// The number
|
|
|
|
|
/// </summary>
|
|
|
|
|
Number,
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// The string
|
|
|
|
|
/// </summary>
|
|
|
|
|
String,
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// The variable
|
|
|
|
|
/// </summary>
|
|
|
|
|
Variable,
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// The function
|
|
|
|
|
/// </summary>
|
|
|
|
|
Function,
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// The parenthesis
|
|
|
|
|
/// </summary>
|
|
|
|
|
Parenthesis,
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// The operator
|
|
|
|
|
/// </summary>
|
|
|
|
|
Operator,
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// The comma
|
|
|
|
|
/// </summary>
|
|
|
|
|
Comma,
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// The wall, used to specified the end of argument list of the following function
|
|
|
|
|
/// </summary>
|
|
|
|
|
Wall,
|
|
|
|
|
}
|
2019-02-17 14:08:57 +01:00
|
|
|
|
}
|