namespace Unosquare.Swan.Abstractions { using System; using System.Collections.Generic; using System.Linq; /// /// Represents a generic tokenizer. /// public abstract class Tokenizer { private const char PeriodChar = '.'; private const char CommaChar = ','; private const char StringQuotedChar = '"'; private const char OpenFuncChar = '('; private const char CloseFuncChar = ')'; private const char NegativeChar = '-'; private const string OpenFuncStr = "("; private readonly List _operators = new List(); /// /// Initializes a new instance of the class. /// This constructor will use the following default operators: /// /// /// /// Operator /// Precedence /// /// /// = /// 1 /// /// /// != /// 1 /// /// /// > /// 2 /// /// /// < /// 2 /// /// /// >= /// 2 /// /// /// <= /// 2 /// /// /// + /// 3 /// /// /// & /// 3 /// /// /// - /// 3 /// /// /// * /// 4 /// /// /// (backslash) /// 4 /// /// /// / /// 4 /// /// /// ^ /// 4 /// /// /// /// The input. protected Tokenizer(string input) { _operators.AddRange(GetDefaultOperators()); Tokenize(input); } /// /// Initializes a new instance of the class. /// /// The input. /// The operators to use. protected Tokenizer(string input, IEnumerable operators) { _operators.AddRange(operators); Tokenize(input); } /// /// Gets the tokens. /// /// /// The tokens. /// public List Tokens { get; } = new List(); /// /// Validates the input and return the start index for tokenizer. /// /// The input. /// The start index. /// true if the input is valid, otherwise false. public abstract bool ValidateInput(string input, out int startIndex); /// /// Resolves the type of the function or member. /// /// The input. /// The token type. public abstract TokenType ResolveFunctionOrMemberType(string input); /// /// Evaluates the function or member. /// /// The input. /// The position. /// true if the input is a valid function or variable, otherwise false. public virtual bool EvaluateFunctionOrMember(string input, int position) => false; /// /// Gets the default operators. /// /// An array with the operators to use for the tokenizer. public virtual Operator[] GetDefaultOperators() => new[] { new Operator {Name = "=", Precedence = 1}, new Operator {Name = "!=", Precedence = 1}, new Operator {Name = ">", Precedence = 2}, new Operator {Name = "<", Precedence = 2}, new Operator {Name = ">=", Precedence = 2}, new Operator {Name = "<=", Precedence = 2}, new Operator {Name = "+", Precedence = 3}, new Operator {Name = "&", Precedence = 3}, new Operator {Name = "-", Precedence = 3}, new Operator {Name = "*", Precedence = 4}, new Operator {Name = "/", Precedence = 4}, new Operator {Name = "\\", Precedence = 4}, new Operator {Name = "^", Precedence = 4}, }; /// /// Shunting the yard. /// /// if set to true [include function stopper] (Token type Wall). /// /// Enumerable of the token in in. /// /// /// Wrong token /// or /// Mismatched parenthesis. /// public virtual IEnumerable ShuntingYard(bool includeFunctionStopper = true) { var stack = new Stack(); foreach (var tok in Tokens) { switch (tok.Type) { case TokenType.Number: case TokenType.Variable: case TokenType.String: yield return tok; break; case TokenType.Function: stack.Push(tok); break; case TokenType.Operator: while (stack.Any() && stack.Peek().Type == TokenType.Operator && CompareOperators(tok.Value, stack.Peek().Value)) yield return stack.Pop(); stack.Push(tok); break; case TokenType.Comma: while (stack.Any() && (stack.Peek().Type != TokenType.Comma && stack.Peek().Type != TokenType.Parenthesis)) yield return stack.Pop(); break; case TokenType.Parenthesis: if (tok.Value == OpenFuncStr) { if (stack.Any() && stack.Peek().Type == TokenType.Function) { if (includeFunctionStopper) yield return new Token(TokenType.Wall, tok.Value); } stack.Push(tok); } else { while (stack.Peek().Value != OpenFuncStr) yield return stack.Pop(); stack.Pop(); if (stack.Any() && stack.Peek().Type == TokenType.Function) { yield return stack.Pop(); } } break; default: throw new InvalidOperationException("Wrong token"); } } while (stack.Any()) { var tok = stack.Pop(); if (tok.Type == TokenType.Parenthesis) throw new InvalidOperationException("Mismatched parenthesis"); yield return tok; } } private static bool CompareOperators(Operator op1, Operator op2) => op1.RightAssociative ? op1.Precedence < op2.Precedence : op1.Precedence <= op2.Precedence; private void Tokenize(string input) { if (!ValidateInput(input, out var startIndex)) { return; } for (var i = startIndex; i < input.Length; i++) { if (char.IsWhiteSpace(input, i)) continue; if (input[i] == CommaChar) { Tokens.Add(new Token(TokenType.Comma, new string(new[] { input[i] }))); continue; } if (input[i] == StringQuotedChar) { i = ExtractString(input, i); continue; } if (char.IsLetter(input, i) || EvaluateFunctionOrMember(input, i)) { i = ExtractFunctionOrMember(input, i); continue; } if (char.IsNumber(input, i) || ( input[i] == NegativeChar && ((Tokens.Any() && Tokens.Last().Type != TokenType.Number) || !Tokens.Any()))) { i = ExtractNumber(input, i); continue; } if (input[i] == OpenFuncChar || input[i] == CloseFuncChar) { Tokens.Add(new Token(TokenType.Parenthesis, new string(new[] { input[i] }))); continue; } i = ExtractOperator(input, i); } } private int ExtractData( string input, int i, Func tokenTypeEvaluation, Func evaluation, int right = 0, int left = -1) { var charCount = 0; for (var j = i + right; j < input.Length; j++) { if (evaluation(input[j])) break; charCount++; } // Extract and set the value var value = input.SliceLength(i + right, charCount); Tokens.Add(new Token(tokenTypeEvaluation(value), value)); i += charCount + left; return i; } private int ExtractOperator(string input, int i) => ExtractData(input, i, x => TokenType.Operator, x => x == OpenFuncChar || x == CommaChar || x == PeriodChar || x == StringQuotedChar || char.IsWhiteSpace(x) || char.IsNumber(x)); private int ExtractFunctionOrMember(string input, int i) => ExtractData(input, i, ResolveFunctionOrMemberType, x => x == OpenFuncChar || x == CloseFuncChar || x == CommaChar || char.IsWhiteSpace(x)); private int ExtractNumber(string input, int i) => ExtractData(input, i, x => TokenType.Number, x => !char.IsNumber(x) && x != PeriodChar && x != NegativeChar); private int ExtractString(string input, int i) { var length = ExtractData(input, i, x => TokenType.String, x => x == StringQuotedChar, 1, 1); // open string, report issue if (length == input.Length && input[length - 1] != StringQuotedChar) throw new FormatException($"Parser error (Position {i}): Expected '\"' but got '{input[length - 1]}'."); return length; } private bool CompareOperators(string op1, string op2) => CompareOperators(GetOperatorOrDefault(op1), GetOperatorOrDefault(op2)); private Operator GetOperatorOrDefault(string op) => _operators.FirstOrDefault(x => x.Name == op) ?? new Operator { Name = op, Precedence = 0 }; } /// /// Represents an operator with precedence. /// public class Operator { /// /// Gets or sets the name. /// /// /// The name. /// public string Name { get; set; } /// /// Gets or sets the precedence. /// /// /// The precedence. /// public int Precedence { get; set; } /// /// Gets or sets a value indicating whether [right associative]. /// /// /// true if [right associative]; otherwise, false. /// public bool RightAssociative { get; set; } } /// /// Represents a Token structure. /// public struct Token { /// /// Initializes a new instance of the struct. /// /// The type. /// The value. public Token(TokenType type, string value) { Type = type; Value = type == TokenType.Function || type == TokenType.Operator ? value.ToLowerInvariant() : value; } /// /// Gets or sets the type. /// /// /// The type. /// public TokenType Type { get; set; } /// /// Gets the value. /// /// /// The value. /// public string Value { get; } } /// /// Enums the token types. /// public enum TokenType { /// /// The number /// Number, /// /// The string /// String, /// /// The variable /// Variable, /// /// The function /// Function, /// /// The parenthesis /// Parenthesis, /// /// The operator /// Operator, /// /// The comma /// Comma, /// /// The wall, used to specified the end of argument list of the following function /// Wall, } }