In my previous post we’ve parsed an exception verifying whether it’s correct or not syntactically. But we still have to evaluate it. To be able to do that we’ll have to build an abstract syntax tree. This can be done by modifying the previous code and inserting semantic action. That means we do something more when we match productions.
An abstract syntax tree is a binary tree. The inner nodes will represent operators and leafs will be numerical values.
Here is how a node in the AST will look:

It is defined like this:
enum ASTNodeType
{
Undefined,
OperatorPlus,
OperatorMinus,
OperatorMul,
OperatorDiv,
UnaryMinus,
NumberValue
};
class ASTNode
{
public:
ASTNodeType Type;
double Value;
ASTNode* Left;
ASTNode* Right;
ASTNode()
{
Type = Undefined;
Value = 0;
Left = NULL;
Right = NULL;
}
~ASTNode()
{
delete Left;
delete Right;
}
};
For the expression 1+2*3, the AST will be:

We’ll build this tree by inserting semantic actions and adding nodes according to the following rules:

You’ll probably notice that based on these rules the AST shown above will be modified a little bit, with some additional nodes for operators + and *, having on the left a leaf node with the neutral element for the operation (zero for + and 1 for *), and on the right a node corresponding to a TERM or FACTOR. This won’t affect the evaluation.
The Parser class will change so that the functions corresponding to the non-terminal symbols EXP, EXP1, TERM, TERM1 and FACTOR will return an ASTNode* instead of void. That is the node created as a semantic action.
class Parser
{
Token m_crtToken;
const char* m_Text;
size_t m_Index;
private:
ASTNode* Expression()
{
ASTNode* tnode = Term();
ASTNode* e1node = Expression1();
return CreateNode(OperatorPlus, tnode, e1node);
}
ASTNode* Expression1()
{
ASTNode* tnode;
ASTNode* e1node;
switch(m_crtToken.Type)
{
case Plus:
GetNextToken();
tnode = Term();
e1node = Expression1();
return CreateNode(OperatorPlus, e1node, tnode);
case Minus:
GetNextToken();
tnode = Term();
e1node = Expression1();
return CreateNode(OperatorMinus, e1node, tnode);
}
return CreateNodeNumber(0);
}
ASTNode* Term()
{
ASTNode* fnode = Factor();
ASTNode* t1node = Term1();
return CreateNode(OperatorMul, fnode, t1node);
}
ASTNode* Term1()
{
ASTNode* fnode;
ASTNode* t1node;
switch(m_crtToken.Type)
{
case Mul:
GetNextToken();
fnode = Factor();
t1node = Term1();
return CreateNode(OperatorMul, t1node, fnode);
case Div:
GetNextToken();
fnode = Factor();
t1node = Term1();
return CreateNode(OperatorDiv, t1node, fnode);
}
return CreateNodeNumber(1);
}
ASTNode* Factor()
{
ASTNode* node;
switch(m_crtToken.Type)
{
case OpenParenthesis:
GetNextToken();
node = Expression();
Match(')');
return node;
case Minus:
GetNextToken();
node = Factor();
return CreateUnaryNode(node);
case Number:
{
double value = m_crtToken.Value;
GetNextToken();
return CreateNodeNumber(value);
}
default:
{
std::stringstream sstr;
sstr << "Unexpected token '" << m_crtToken.Symbol << "' at position " << m_Index;
throw ParserException(sstr.str(), m_Index);
}
}
}
ASTNode* CreateNode(ASTNodeType type, ASTNode* left, ASTNode* right)
{
ASTNode* node = new ASTNode;
node->Type = type;
node->Left = left;
node->Right = right;
return node;
}
ASTNode* CreateUnaryNode(ASTNode* left)
{
ASTNode* node = new ASTNode;
node->Type = UnaryMinus;
node->Left = left;
node->Right = NULL;
return node;
}
ASTNode* CreateNodeNumber(double value)
{
ASTNode* node = new ASTNode;
node->Type = NumberValue;
node->Value = value;
return node;
}
void Match(char expected)
{
if(m_Text[m_Index-1] == expected)
GetNextToken();
else
{
std::stringstream sstr;
sstr << "Expected token '" << expected << "' at position " << m_Index;
throw ParserException(sstr.str(), m_Index);
}
}
void SkipWhitespaces()
{
while(isspace(m_Text[m_Index])) m_Index++;
}
void GetNextToken()
{
SkipWhitespaces();
m_crtToken.Value = 0;
m_crtToken.Symbol = 0;
if(m_Text[m_Index] == 0)
{
m_crtToken.Type = EndOfText;
return;
}
if(isdigit(m_Text[m_Index]))
{
m_crtToken.Type = Number;
m_crtToken.Value = GetNumber();
return;
}
m_crtToken.Type = Error;
switch(m_Text[m_Index])
{
case '+': m_crtToken.Type = Plus; break;
case '-': m_crtToken.Type = Minus; break;
case '*': m_crtToken.Type = Mul; break;
case '/': m_crtToken.Type = Div; break;
case '(': m_crtToken.Type = OpenParenthesis; break;
case ')': m_crtToken.Type = ClosedParenthesis; break;
}
if(m_crtToken.Type != Error)
{
m_crtToken.Symbol = m_Text[m_Index];
m_Index++;
}
else
{
std::stringstream sstr;
sstr << "Unexpected token '" << m_Text[m_Index] << "' at position " << m_Index;
throw ParserException(sstr.str(), m_Index);
}
}
double GetNumber()
{
SkipWhitespaces();
int index = m_Index;
while(isdigit(m_Text[m_Index])) m_Index++;
if(m_Text[m_Index] == '.') m_Index++;
while(isdigit(m_Text[m_Index])) m_Index++;
if(m_Index - index == 0)
throw ParserException("Number expected but not found!", m_Index);
char buffer[32] = {0};
memcpy(buffer, &m_Text[index], m_Index - index);
return atof(buffer);
}
public:
ASTNode* Parse(const char* text)
{
m_Text = text;
m_Index = 0;
GetNextToken();
return Expression();
}
};
Now the Parse() method will return the created abstract syntax tree. We will see how to evaluate the expression by traversing this tree in the next post.










