using System; using System.Collections.Generic; using System.Linq; namespace Unosquare.Swan.Abstractions { /// /// Represents a generic tokenizer. /// public abstract class Tokenizer { private const Char PeriodChar = '.'; private const Char CommaChar = ','; private const Char StringQuotedChar = '"'; private const Char OpenFuncChar = '('; private const Char CloseFuncChar = ')'; private const Char NegativeChar = '-'; private const String OpenFuncStr = "("; private readonly List _operators = new List(); /// /// Initializes a new instance of the class. /// This constructor will use the following default operators: /// /// /// /// Operator /// Precedence /// /// /// = /// 1 /// /// /// != /// 1 /// /// /// > /// 2 /// /// /// < /// 2 /// /// /// >= /// 2 /// /// /// <= /// 2 /// /// /// + /// 3 /// /// /// & /// 3 /// /// /// - /// 3 /// /// /// * /// 4 /// /// /// (backslash) /// 4 /// /// /// / /// 4 /// /// /// ^ /// 4 /// /// /// /// The input. protected Tokenizer(String input) { this._operators.AddRange(this.GetDefaultOperators()); this.Tokenize(input); } /// /// Initializes a new instance of the class. /// /// The input. /// The operators to use. protected Tokenizer(String input, IEnumerable operators) { this._operators.AddRange(operators); this.Tokenize(input); } /// /// Gets the tokens. /// /// /// The tokens. /// public List Tokens { get; } = new List(); /// /// Validates the input and return the start index for tokenizer. /// /// The input. /// The start index. /// true if the input is valid, otherwise false. public abstract Boolean ValidateInput(String input, out Int32 startIndex); /// /// Resolves the type of the function or member. /// /// The input. /// The token type. public abstract TokenType ResolveFunctionOrMemberType(String input); /// /// Evaluates the function or member. /// /// The input. /// The position. /// true if the input is a valid function or variable, otherwise false. public virtual Boolean EvaluateFunctionOrMember(String input, Int32 position) => false; /// /// Gets the default operators. /// /// An array with the operators to use for the tokenizer. public virtual Operator[] GetDefaultOperators() => new[] { new Operator {Name = "=", Precedence = 1}, new Operator {Name = "!=", Precedence = 1}, new Operator {Name = ">", Precedence = 2}, new Operator {Name = "<", Precedence = 2}, new Operator {Name = ">=", Precedence = 2}, new Operator {Name = "<=", Precedence = 2}, new Operator {Name = "+", Precedence = 3}, new Operator {Name = "&", Precedence = 3}, new Operator {Name = "-", Precedence = 3}, new Operator {Name = "*", Precedence = 4}, new Operator {Name = "/", Precedence = 4}, new Operator {Name = "\\", Precedence = 4}, new Operator {Name = "^", Precedence = 4}, }; /// /// Shunting the yard. /// /// if set to true [include function stopper] (Token type Wall). /// /// Enumerable of the token in in. /// /// /// Wrong token /// or /// Mismatched parenthesis. /// public virtual IEnumerable ShuntingYard(Boolean includeFunctionStopper = true) { Stack stack = new Stack(); foreach(Token tok in this.Tokens) { switch(tok.Type) { case TokenType.Number: case TokenType.Variable: case TokenType.String: yield return tok; break; case TokenType.Function: stack.Push(tok); break; case TokenType.Operator: while(stack.Any() && stack.Peek().Type == TokenType.Operator && this.CompareOperators(tok.Value, stack.Peek().Value)) { yield return stack.Pop(); } stack.Push(tok); break; case TokenType.Comma: while(stack.Any() && stack.Peek().Type != TokenType.Comma && stack.Peek().Type != TokenType.Parenthesis) { yield return stack.Pop(); } break; case TokenType.Parenthesis: if(tok.Value == OpenFuncStr) { if(stack.Any() && stack.Peek().Type == TokenType.Function) { if(includeFunctionStopper) { yield return new Token(TokenType.Wall, tok.Value); } } stack.Push(tok); } else { while(stack.Peek().Value != OpenFuncStr) { yield return stack.Pop(); } _ = stack.Pop(); if(stack.Any() && stack.Peek().Type == TokenType.Function) { yield return stack.Pop(); } } break; default: throw new InvalidOperationException("Wrong token"); } } while(stack.Any()) { Token tok = stack.Pop(); if(tok.Type == TokenType.Parenthesis) { throw new InvalidOperationException("Mismatched parenthesis"); } yield return tok; } } private static Boolean CompareOperators(Operator op1, Operator op2) => op1.RightAssociative ? op1.Precedence < op2.Precedence : op1.Precedence <= op2.Precedence; private void Tokenize(String input) { if(!this.ValidateInput(input, out Int32 startIndex)) { return; } for(Int32 i = startIndex; i < input.Length; i++) { if(Char.IsWhiteSpace(input, i)) { continue; } if(input[i] == CommaChar) { this.Tokens.Add(new Token(TokenType.Comma, new String(new[] { input[i] }))); continue; } if(input[i] == StringQuotedChar) { i = this.ExtractString(input, i); continue; } if(Char.IsLetter(input, i) || this.EvaluateFunctionOrMember(input, i)) { i = this.ExtractFunctionOrMember(input, i); continue; } if(Char.IsNumber(input, i) || input[i] == NegativeChar && (this.Tokens.Any() && this.Tokens.Last().Type != TokenType.Number || !this.Tokens.Any())) { i = this.ExtractNumber(input, i); continue; } if(input[i] == OpenFuncChar || input[i] == CloseFuncChar) { this.Tokens.Add(new Token(TokenType.Parenthesis, new String(new[] { input[i] }))); continue; } i = this.ExtractOperator(input, i); } } private Int32 ExtractData( String input, Int32 i, Func tokenTypeEvaluation, Func evaluation, Int32 right = 0, Int32 left = -1) { Int32 charCount = 0; for(Int32 j = i + right; j < input.Length; j++) { if(evaluation(input[j])) { break; } charCount++; } // Extract and set the value String value = input.SliceLength(i + right, charCount); this.Tokens.Add(new Token(tokenTypeEvaluation(value), value)); i += charCount + left; return i; } private Int32 ExtractOperator(String input, Int32 i) => this.ExtractData(input, i, x => TokenType.Operator, x => x == OpenFuncChar || x == CommaChar || x == PeriodChar || x == StringQuotedChar || Char.IsWhiteSpace(x) || Char.IsNumber(x)); private Int32 ExtractFunctionOrMember(String input, Int32 i) => this.ExtractData(input, i, this.ResolveFunctionOrMemberType, x => x == OpenFuncChar || x == CloseFuncChar || x == CommaChar || Char.IsWhiteSpace(x)); private Int32 ExtractNumber(String input, Int32 i) => this.ExtractData(input, i, x => TokenType.Number, x => !Char.IsNumber(x) && x != PeriodChar && x != NegativeChar); private Int32 ExtractString(String input, Int32 i) { Int32 length = this.ExtractData(input, i, x => TokenType.String, x => x == StringQuotedChar, 1, 1); // open string, report issue if(length == input.Length && input[length - 1] != StringQuotedChar) { throw new FormatException($"Parser error (Position {i}): Expected '\"' but got '{input[length - 1]}'."); } return length; } private Boolean CompareOperators(String op1, String op2) => CompareOperators(this.GetOperatorOrDefault(op1), this.GetOperatorOrDefault(op2)); private Operator GetOperatorOrDefault(String op) => this._operators.FirstOrDefault(x => x.Name == op) ?? new Operator { Name = op, Precedence = 0 }; } /// /// Represents an operator with precedence. /// public class Operator { /// /// Gets or sets the name. /// /// /// The name. /// public String Name { get; set; } /// /// Gets or sets the precedence. /// /// /// The precedence. /// public Int32 Precedence { get; set; } /// /// Gets or sets a value indicating whether [right associative]. /// /// /// true if [right associative]; otherwise, false. /// public Boolean RightAssociative { get; set; } } /// /// Represents a Token structure. /// public struct Token { /// /// Initializes a new instance of the struct. /// /// The type. /// The value. public Token(TokenType type, String value) { this.Type = type; this.Value = type == TokenType.Function || type == TokenType.Operator ? value.ToLowerInvariant() : value; } /// /// Gets or sets the type. /// /// /// The type. /// public TokenType Type { get; set; } /// /// Gets the value. /// /// /// The value. /// public String Value { get; } } /// /// Enums the token types. /// public enum TokenType { /// /// The number /// Number, /// /// The string /// String, /// /// The variable /// Variable, /// /// The function /// Function, /// /// The parenthesis /// Parenthesis, /// /// The operator /// Operator, /// /// The comma /// Comma, /// /// The wall, used to specified the end of argument list of the following function /// Wall, } }