For some reason, the Reflection namespace in .Net is one of my favorites. Particularly interesting is the Reflection.Emit namespace. If your not familiar, it’s a collection of classes that allows you to create new assemblies, classes, and methods at run time. The DynamicMethod class gives you a way to generate a method and create a delegate to call it. This requires some knowledge of MSIL, an assembly style language that .Net languages compile to before they are converted to true machine language. Since I was bored this week at work, I combined this with a something I learned from back in my college days. Recursive Descent Parsing is a method often used for compiling expressions into assembly style instructions.
The use case for this is simple, you have some need for users to input simple numerical formula into your system, and quickly execute those functions against various inputs and store the results. The following code takes a standard math expression, combined with variables and functions, and compiles it into a dynamic method that can be executed. Variable values and function calls are handled by a callback function that receives the name of the symbol and any arguments supplied.
The most difficult part of working with IL is generating valid code. The most common error messages are usually ‘This operation could destabilize the runtime.’ or ‘An invalid program was detected.’ Neither of which is very helpful. The most common way trouble shoot IL generation is to write the code you want to simulate in C# or VB, compile it, and use the ILDASM disassembly tool that comes with Visual Studio to examine the code. All I can suggest is to look at your code carefully and be sure of the state of the stack, and what type all your values are at each point. I attempted to comment most of the code below in the IL generation portion so you can follow what I was doing.
One last note about the below code, I wrote it quick and didn’t put any verification into the parser. That is, it will likely accept badly formed expressions, generating invalid code or throwing meaningless errors. I’ll leave adding proper error messages to the parsing section an exercise for the reader. Have fun.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Reflection;
using System.Reflection.Emit;
/*
* This is a simple example of a recursive descent parser combined
* with dynamic methods to Compile simple expressions stored in a string.
* This example lacks error feedback for invalid expressions (invalid expressions may generate invalid code)
* This example handles limited numeric types only (would need additional type conversion logic added to handle strings etc.).
*/
namespace ExpressionEvaluatorExample
{
class Program
{
static void Main(string[] args)
{
//Callback delegate for resolving Variables and functions
Func<string, Array, object > resolve = (String name, Array arg) =>
{
switch (name)
{
case "Var1":
return 5;
case "Var2":
return 6;
case "Pow":
return System.Math.Pow((double)arg.GetValue(0), (double)arg.GetValue(1));
}
return 0;
};
var FE = new functionEvaluator();
FE.Tokenize("Pow(4, 2)+Var2*(10+5)"); //106
var fx = FE.Compile();
Console.WriteLine("Result: " + fx(resolve).ToString());
Console.ReadLine();
}
}
class functionEvaluator
{
/* Supported Grammar
* Sum = Mul [+|- Mul]
* Mul = Factor [*|/ Factor]
* Factor = Number | (Sum)
*/
private List<string> _tokens;
private int index = 0;
private string token
{
get {
if(index < _tokens.Count )
return _tokens[index];
else
return null;
}
}
private string peek()
{
if (index < _tokens.Count)
return _tokens[index+1];
else
return null;
}
private bool next()
{
if (index < _tokens.Count)
{
index++;
return true;
}
else
return false;
}
public void Tokenize(string expr)
{
//Tokenize regex adapted from: http://www.codeproject.com/Tips/371647/Converting-InFix-to-PostFix-by-Recursive-Descenden
string syntax = @"\s*(\d+|'.*?'|[-+*/%(),]|[^\s-+*/%(),]+)\s*";
_tokens = Regex.Matches(expr, syntax, RegexOptions.Compiled)
.Cast<Match>().Select(t => t.Groups[1].Value.Trim()).ToList() ;
}
private ILGenerator ilgen;
public Func<Func<string, Array, object>, object> Compile()
{
var method = new DynamicMethod("calculate", typeof(object), new Type[] { typeof(Func<string, Array, object>) });
ilgen = method.GetILGenerator();
Sum();
ilgen.Emit(OpCodes.Box, typeof(double)); //box the return type
ilgen.Emit(OpCodes.Ret); //return
return (Func<Func<string, Array, object>, object>)method.CreateDelegate(typeof(Func<Func<string, Array, object>, object>));
}
public void Sum()
{
Mul();
if (token == "+")
{
next();
Mul();
ilgen.Emit(OpCodes.Add);
}
else if (token == "-")
{
next();
Mul();
ilgen.Emit(OpCodes.Sub);
}
}
public void Mul()
{
Factor();
if (token == "*")
{
next();
Factor();
ilgen.Emit(OpCodes.Mul);
}
else if (token == "/")
{
next();
Factor();
ilgen.Emit(OpCodes.Div);
}
}
public void Factor()
{
if (token == "(")
{
next();
Sum();
}
else
{
Symbol();
}
}
public void Symbol()
{
double value;
if (double.TryParse(token, out value))
{
ilgen.Emit(OpCodes.Ldc_R8, value); //load numeric literal to the stack as a double
next();
}
else
{
ilgen.Emit(OpCodes.Ldarg_0); //load first argument (pointer to delegate)
ilgen.Emit(OpCodes.Ldstr, token); //load token name to stack
if (peek() == "(")
{
//function arguments
next();
int args = Arguments();
if (args > 0)
{
var arry = ilgen.DeclareLocal(typeof(Array ));
var argval = ilgen.DeclareLocal(typeof(double));
ilgen.Emit(OpCodes.Ldtoken, typeof(object)); //load type token
ilgen.Emit(OpCodes.Call, typeof(System.Type).GetMethod("GetTypeFromHandle")); //get type from handle
ilgen.Emit(OpCodes.Ldc_I4 , args); //load number of elements
ilgen.Emit(OpCodes.Call, typeof(Array).GetMethod("CreateInstance", new Type[] { typeof(Type), typeof(int) })); //call array object factory
ilgen.Emit(OpCodes.Stloc, arry.LocalIndex); // store array in local
for(int i=0;i<args;i++)
{
ilgen.Emit(OpCodes.Stloc, argval.LocalIndex ); //pop argument from stack
ilgen.Emit(OpCodes.Ldloc, arry.LocalIndex); //load array on stack
ilgen.Emit(OpCodes.Castclass, typeof(object[])); //cast to object array
ilgen.Emit(OpCodes.Ldc_I4, i); //load index on stack
ilgen.Emit(OpCodes.Ldloc, argval.LocalIndex ); //load local back on stack
ilgen.Emit(OpCodes.Box, typeof(double)); //box value
ilgen.Emit(OpCodes.Stelem_Ref); //store in array
}
ilgen.Emit(OpCodes.Ldloc, arry.LocalIndex); // push array to stack
}
else
{
ilgen.Emit(OpCodes.Ldnull); //load null reference
}
}
else
{
ilgen.Emit(OpCodes.Ldnull); //load null reference
}
//call the delegate
ilgen.Emit(OpCodes.Callvirt, typeof(Func<string, Array, object>).GetMethod("Invoke", new Type[] {typeof(string), typeof(object[])}));
//store result in local var
var retval = ilgen.DeclareLocal(typeof(object));
ilgen.Emit(OpCodes.Stloc, retval); // store local
ilgen.Emit(OpCodes.Ldloc, retval); // put it back on the stack
var intToFloat = ilgen.DefineLabel(); // Label for branching
var done = ilgen.DefineLabel(); // Label for branching
//test type, branch to conversion
ilgen.Emit(OpCodes.Isinst, typeof(Int32 )); //Test if boxed type is integer
ilgen.Emit(OpCodes.Brtrue, intToFloat); //Branch to int32 conversion if true
//case else
ilgen.Emit(OpCodes.Ldloc, retval); //load from local
ilgen.Emit(OpCodes.Unbox_Any, typeof(double)); //unbox double
ilgen.Emit(OpCodes.Br, done); //branch to end of block
//convert int32 to fload
ilgen.MarkLabel(intToFloat);
ilgen.Emit(OpCodes.Ldloc, retval); //load local to stack
ilgen.Emit(OpCodes.Unbox_Any, typeof(Int32)); //unbox int32
ilgen.Emit(OpCodes.Conv_R8); //convert to float64
ilgen.MarkLabel(done); // end of block lable
next();
}
}
public int Arguments()
{
int i = 0;
if (token == "(") next();
while (token != ")" && token != null)
{
Sum();
i++;
if (token == ",") next();
}
return i;
}
}
}