For some reason, the Reflection namespace in .Net is one of my favorites. Particularly interesting is the Reflection.Emit namespace. If your not familiar, it’s a collection of classes that allows you to create new assemblies, classes, and methods at run time. The DynamicMethod class gives you a way to generate a method and create a delegate to call it. This requires some knowledge of MSIL, an assembly style language that .Net languages compile to before they are converted to true machine language. Since I was bored this week at work, I combined this with a something I learned from back in my college days. Recursive Descent Parsing is a method often used for compiling expressions into assembly style instructions.
The use case for this is simple, you have some need for users to input simple numerical formula into your system, and quickly execute those functions against various inputs and store the results. The following code takes a standard math expression, combined with variables and functions, and compiles it into a dynamic method that can be executed. Variable values and function calls are handled by a callback function that receives the name of the symbol and any arguments supplied.
The most difficult part of working with IL is generating valid code. The most common error messages are usually ‘This operation could destabilize the runtime.’ or ‘An invalid program was detected.’ Neither of which is very helpful. The most common way trouble shoot IL generation is to write the code you want to simulate in C# or VB, compile it, and use the ILDASM disassembly tool that comes with Visual Studio to examine the code. All I can suggest is to look at your code carefully and be sure of the state of the stack, and what type all your values are at each point. I attempted to comment most of the code below in the IL generation portion so you can follow what I was doing.
One last note about the below code, I wrote it quick and didn’t put any verification into the parser. That is, it will likely accept badly formed expressions, generating invalid code or throwing meaningless errors. I’ll leave adding proper error messages to the parsing section an exercise for the reader. Have fun.
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Reflection; using System.Reflection.Emit; /* * This is a simple example of a recursive descent parser combined * with dynamic methods to Compile simple expressions stored in a string. * This example lacks error feedback for invalid expressions (invalid expressions may generate invalid code) * This example handles limited numeric types only (would need additional type conversion logic added to handle strings etc.). */ namespace ExpressionEvaluatorExample { class Program { static void Main(string[] args) { //Callback delegate for resolving Variables and functions Func<string, Array, object > resolve = (String name, Array arg) => { switch (name) { case "Var1": return 5; case "Var2": return 6; case "Pow": return System.Math.Pow((double)arg.GetValue(0), (double)arg.GetValue(1)); } return 0; }; var FE = new functionEvaluator(); FE.Tokenize("Pow(4, 2)+Var2*(10+5)"); //106 var fx = FE.Compile(); Console.WriteLine("Result: " + fx(resolve).ToString()); Console.ReadLine(); } } class functionEvaluator { /* Supported Grammar * Sum = Mul [+|- Mul] * Mul = Factor [*|/ Factor] * Factor = Number | (Sum) */ private List<string> _tokens; private int index = 0; private string token { get { if(index < _tokens.Count ) return _tokens[index]; else return null; } } private string peek() { if (index < _tokens.Count) return _tokens[index+1]; else return null; } private bool next() { if (index < _tokens.Count) { index++; return true; } else return false; } public void Tokenize(string expr) { //Tokenize regex adapted from: http://www.codeproject.com/Tips/371647/Converting-InFix-to-PostFix-by-Recursive-Descenden string syntax = @"\s*(\d+|'.*?'|[-+*/%(),]|[^\s-+*/%(),]+)\s*"; _tokens = Regex.Matches(expr, syntax, RegexOptions.Compiled) .Cast<Match>().Select(t => t.Groups[1].Value.Trim()).ToList() ; } private ILGenerator ilgen; public Func<Func<string, Array, object>, object> Compile() { var method = new DynamicMethod("calculate", typeof(object), new Type[] { typeof(Func<string, Array, object>) }); ilgen = method.GetILGenerator(); Sum(); ilgen.Emit(OpCodes.Box, typeof(double)); //box the return type ilgen.Emit(OpCodes.Ret); //return return (Func<Func<string, Array, object>, object>)method.CreateDelegate(typeof(Func<Func<string, Array, object>, object>)); } public void Sum() { Mul(); if (token == "+") { next(); Mul(); ilgen.Emit(OpCodes.Add); } else if (token == "-") { next(); Mul(); ilgen.Emit(OpCodes.Sub); } } public void Mul() { Factor(); if (token == "*") { next(); Factor(); ilgen.Emit(OpCodes.Mul); } else if (token == "/") { next(); Factor(); ilgen.Emit(OpCodes.Div); } } public void Factor() { if (token == "(") { next(); Sum(); } else { Symbol(); } } public void Symbol() { double value; if (double.TryParse(token, out value)) { ilgen.Emit(OpCodes.Ldc_R8, value); //load numeric literal to the stack as a double next(); } else { ilgen.Emit(OpCodes.Ldarg_0); //load first argument (pointer to delegate) ilgen.Emit(OpCodes.Ldstr, token); //load token name to stack if (peek() == "(") { //function arguments next(); int args = Arguments(); if (args > 0) { var arry = ilgen.DeclareLocal(typeof(Array )); var argval = ilgen.DeclareLocal(typeof(double)); ilgen.Emit(OpCodes.Ldtoken, typeof(object)); //load type token ilgen.Emit(OpCodes.Call, typeof(System.Type).GetMethod("GetTypeFromHandle")); //get type from handle ilgen.Emit(OpCodes.Ldc_I4 , args); //load number of elements ilgen.Emit(OpCodes.Call, typeof(Array).GetMethod("CreateInstance", new Type[] { typeof(Type), typeof(int) })); //call array object factory ilgen.Emit(OpCodes.Stloc, arry.LocalIndex); // store array in local for(int i=0;i<args;i++) { ilgen.Emit(OpCodes.Stloc, argval.LocalIndex ); //pop argument from stack ilgen.Emit(OpCodes.Ldloc, arry.LocalIndex); //load array on stack ilgen.Emit(OpCodes.Castclass, typeof(object[])); //cast to object array ilgen.Emit(OpCodes.Ldc_I4, i); //load index on stack ilgen.Emit(OpCodes.Ldloc, argval.LocalIndex ); //load local back on stack ilgen.Emit(OpCodes.Box, typeof(double)); //box value ilgen.Emit(OpCodes.Stelem_Ref); //store in array } ilgen.Emit(OpCodes.Ldloc, arry.LocalIndex); // push array to stack } else { ilgen.Emit(OpCodes.Ldnull); //load null reference } } else { ilgen.Emit(OpCodes.Ldnull); //load null reference } //call the delegate ilgen.Emit(OpCodes.Callvirt, typeof(Func<string, Array, object>).GetMethod("Invoke", new Type[] {typeof(string), typeof(object[])})); //store result in local var var retval = ilgen.DeclareLocal(typeof(object)); ilgen.Emit(OpCodes.Stloc, retval); // store local ilgen.Emit(OpCodes.Ldloc, retval); // put it back on the stack var intToFloat = ilgen.DefineLabel(); // Label for branching var done = ilgen.DefineLabel(); // Label for branching //test type, branch to conversion ilgen.Emit(OpCodes.Isinst, typeof(Int32 )); //Test if boxed type is integer ilgen.Emit(OpCodes.Brtrue, intToFloat); //Branch to int32 conversion if true //case else ilgen.Emit(OpCodes.Ldloc, retval); //load from local ilgen.Emit(OpCodes.Unbox_Any, typeof(double)); //unbox double ilgen.Emit(OpCodes.Br, done); //branch to end of block //convert int32 to fload ilgen.MarkLabel(intToFloat); ilgen.Emit(OpCodes.Ldloc, retval); //load local to stack ilgen.Emit(OpCodes.Unbox_Any, typeof(Int32)); //unbox int32 ilgen.Emit(OpCodes.Conv_R8); //convert to float64 ilgen.MarkLabel(done); // end of block lable next(); } } public int Arguments() { int i = 0; if (token == "(") next(); while (token != ")" && token != null) { Sum(); i++; if (token == ",") next(); } return i; } } }