Files
SingularityViewer/indra/llmessage/llmessagetemplateparser.cpp

762 lines
16 KiB
C++

/**
* @file llmessagetemplateparser.cpp
* @brief LLMessageTemplateParser implementation
*
* $LicenseInfo:firstyear=2007&license=viewerlgpl$
* Second Life Viewer Source Code
* Copyright (C) 2010, Linden Research, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation;
* version 2.1 of the License only.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
* $/LicenseInfo$
*/
#include "linden_common.h"
#include "llmessagetemplateparser.h"
#include <boost/tokenizer.hpp>
// What follows is a bunch of C functions to do validation.
// Lets support a small subset of regular expressions here
// Syntax is a string made up of:
// a - checks against alphanumeric ([A-Za-z0-9])
// c - checks against character ([A-Za-z])
// f - checks against first variable character ([A-Za-z_])
// v - checks against variable ([A-Za-z0-9_])
// s - checks against sign of integer ([-0-9])
// d - checks against integer digit ([0-9])
// * - repeat last check
// checks 'a'
BOOL b_return_alphanumeric_ok(char c)
{
if ( ( (c < 'A')
||(c > 'Z'))
&&( (c < 'a')
||(c > 'z'))
&&( (c < '0')
||(c > '9')))
{
return FALSE;
}
return TRUE;
}
// checks 'c'
BOOL b_return_character_ok(char c)
{
if ( ( (c < 'A')
||(c > 'Z'))
&&( (c < 'a')
||(c > 'z')))
{
return FALSE;
}
return TRUE;
}
// checks 'f'
BOOL b_return_first_variable_ok(char c)
{
if ( ( (c < 'A')
||(c > 'Z'))
&&( (c < 'a')
||(c > 'z'))
&&(c != '_'))
{
return FALSE;
}
return TRUE;
}
// checks 'v'
BOOL b_return_variable_ok(char c)
{
if ( ( (c < 'A')
||(c > 'Z'))
&&( (c < 'a')
||(c > 'z'))
&&( (c < '0')
||(c > '9'))
&&(c != '_'))
{
return FALSE;
}
return TRUE;
}
// checks 's'
BOOL b_return_signed_integer_ok(char c)
{
if ( ( (c < '0')
||(c > '9'))
&&(c != '-'))
{
return FALSE;
}
return TRUE;
}
// checks 'd'
BOOL b_return_integer_ok(char c)
{
if ( (c < '0')
||(c > '9'))
{
return FALSE;
}
return TRUE;
}
BOOL (*gParseCheckCharacters[])(char c) =
{
b_return_alphanumeric_ok,
b_return_character_ok,
b_return_first_variable_ok,
b_return_variable_ok,
b_return_signed_integer_ok,
b_return_integer_ok
};
S32 get_checker_number(char checker)
{
switch(checker)
{
case 'a':
return 0;
case 'c':
return 1;
case 'f':
return 2;
case 'v':
return 3;
case 's':
return 4;
case 'd':
return 5;
case '*':
return 9999;
default:
return -1;
}
}
// check token based on passed simplified regular expression
BOOL b_check_token(const char *token, const char *regexp)
{
S32 tptr, rptr = 0;
S32 current_checker, next_checker = 0;
current_checker = get_checker_number(regexp[rptr++]);
if (current_checker == -1)
{
LL_ERRS() << "Invalid regular expression value!" << LL_ENDL;
return FALSE;
}
if (current_checker == 9999)
{
LL_ERRS() << "Regular expression can't start with *!" << LL_ENDL;
return FALSE;
}
for (tptr = 0; token[tptr]; tptr++)
{
if (current_checker == -1)
{
LL_ERRS() << "Input exceeds regular expression!\nDid you forget a *?" << LL_ENDL;
return FALSE;
}
if (!gParseCheckCharacters[current_checker](token[tptr]))
{
return FALSE;
}
if (next_checker != 9999)
{
next_checker = get_checker_number(regexp[rptr++]);
if (next_checker != 9999)
{
current_checker = next_checker;
}
}
}
return TRUE;
}
// C variable can be made up of upper or lower case letters, underscores, or numbers, but can't start with a number
BOOL b_variable_ok(const char *token)
{
if (!b_check_token(token, "fv*"))
{
LL_WARNS() << "Token '" << token << "' isn't a variable!" << LL_ENDL;
return FALSE;
}
return TRUE;
}
// An integer is made up of the digits 0-9 and may be preceded by a '-'
BOOL b_integer_ok(const char *token)
{
if (!b_check_token(token, "sd*"))
{
LL_WARNS() << "Token isn't an integer!" << LL_ENDL;
return FALSE;
}
return TRUE;
}
// An integer is made up of the digits 0-9
BOOL b_positive_integer_ok(const char *token)
{
if (!b_check_token(token, "d*"))
{
LL_WARNS() << "Token isn't an integer!" << LL_ENDL;
return FALSE;
}
return TRUE;
}
// Done with C functions, here's the tokenizer.
typedef boost::tokenizer< boost::char_separator<char> > tokenizer;
LLTemplateTokenizer::LLTemplateTokenizer(const std::string & contents) : mStarted(false), mTokens()
{
boost::char_separator<char> newline("\r\n", "", boost::keep_empty_tokens);
boost::char_separator<char> spaces(" \t");
U32 line_counter = 1;
tokenizer line_tokens(contents, newline);
for(tokenizer::iterator line_iter = line_tokens.begin();
line_iter != line_tokens.end();
++line_iter, ++line_counter)
{
tokenizer word_tokens(*line_iter, spaces);
for(tokenizer::iterator word_iter = word_tokens.begin();
word_iter != word_tokens.end();
++word_iter)
{
if((*word_iter)[0] == '/')
{
break; // skip to end of line on comments
}
positioned_token pt;// = new positioned_token();
pt.str = std::string(*word_iter);
pt.line = line_counter;
mTokens.push_back(pt);
}
}
mCurrent = mTokens.begin();
}
void LLTemplateTokenizer::inc()
{
if(atEOF())
{
error("trying to increment token of EOF");
}
else if(mStarted)
{
++mCurrent;
}
else
{
mStarted = true;
mCurrent = mTokens.begin();
}
}
void LLTemplateTokenizer::dec()
{
if(mCurrent == mTokens.begin())
{
if(mStarted)
{
mStarted = false;
}
else
{
error("trying to decrement past beginning of file");
}
}
else
{
mCurrent--;
}
}
std::string LLTemplateTokenizer::get() const
{
if(atEOF())
{
error("trying to get EOF");
}
return mCurrent->str;
}
U32 LLTemplateTokenizer::line() const
{
if(atEOF())
{
return 0;
}
return mCurrent->line;
}
bool LLTemplateTokenizer::atEOF() const
{
return mCurrent == mTokens.end();
}
std::string LLTemplateTokenizer::next()
{
inc();
return get();
}
bool LLTemplateTokenizer::want(const std::string & token)
{
if(atEOF()) return false;
inc();
if(atEOF()) return false;
if(get() != token)
{
dec(); // back up a step
return false;
}
return true;
}
bool LLTemplateTokenizer::wantEOF()
{
// see if the next token is EOF
if(atEOF()) return true;
inc();
if(!atEOF())
{
dec(); // back up a step
return false;
}
return true;
}
void LLTemplateTokenizer::error(std::string message) const
{
if(atEOF())
{
LL_ERRS() << "Unexpected end of file: " << message << LL_ENDL;
}
else
{
LL_ERRS() << "Problem parsing message template at line "
<< line() << ", with token '" << get() << "' : "
<< message << LL_ENDL;
}
}
// Done with tokenizer, next is the parser.
LLTemplateParser::LLTemplateParser(LLTemplateTokenizer & tokens):
mVersion(0.f),
mMessages()
{
// the version number should be the first thing in the file
if (tokens.want("version"))
{
// version number
std::string vers_string = tokens.next();
mVersion = (F32)atof(vers_string.c_str());
LL_INFOS() << "### Message template version " << mVersion << " ###" << LL_ENDL;
}
else
{
LL_ERRS() << "Version must be first in the message template, found "
<< tokens.next() << LL_ENDL;
}
while(LLMessageTemplate * templatep = parseMessage(tokens))
{
if (templatep->getDeprecation() != MD_DEPRECATED)
{
mMessages.push_back(templatep);
}
else
{
delete templatep;
}
}
if(!tokens.wantEOF())
{
LL_ERRS() << "Expected end of template or a message, instead found: "
<< tokens.next() << " at " << tokens.line() << LL_ENDL;
}
}
F32 LLTemplateParser::getVersion() const
{
return mVersion;
}
LLTemplateParser::message_iterator LLTemplateParser::getMessagesBegin() const
{
return mMessages.begin();
}
LLTemplateParser::message_iterator LLTemplateParser::getMessagesEnd() const
{
return mMessages.end();
}
// static
LLMessageTemplate * LLTemplateParser::parseMessage(LLTemplateTokenizer & tokens)
{
LLMessageTemplate *templatep = NULL;
if(!tokens.want("{"))
{
return NULL;
}
// name first
std::string template_name = tokens.next();
// is name a legit C variable name
if (!b_variable_ok(template_name.c_str()))
{
LL_ERRS() << "Not legit variable name: " << template_name << " at " << tokens.line() << LL_ENDL;
}
// ok, now get Frequency ("High", "Medium", or "Low")
EMsgFrequency frequency = MFT_LOW;
std::string freq_string = tokens.next();
if (freq_string == "High")
{
frequency = MFT_HIGH;
}
else if (freq_string == "Medium")
{
frequency = MFT_MEDIUM;
}
else if (freq_string == "Low" || freq_string == "Fixed")
{
frequency = MFT_LOW;
}
else
{
LL_ERRS() << "Expected frequency, got " << freq_string << " at " << tokens.line() << LL_ENDL;
}
// TODO more explicit checking here pls
U32 message_number = strtoul(tokens.next().c_str(),NULL,0);
switch (frequency) {
case MFT_HIGH:
break;
case MFT_MEDIUM:
message_number = (255 << 8) | message_number;
break;
case MFT_LOW:
message_number = (255 << 24) | (255 << 16) | message_number;
break;
default:
LL_ERRS() << "Unknown frequency enum: " << frequency << LL_ENDL;
}
templatep = new LLMessageTemplate(
template_name.c_str(),
message_number,
frequency);
// Now get trust ("Trusted", "NotTrusted")
std::string trust = tokens.next();
if (trust == "Trusted")
{
templatep->setTrust(MT_TRUST);
}
else if (trust == "NotTrusted")
{
templatep->setTrust(MT_NOTRUST);
}
else
{
LL_ERRS() << "Bad trust " << trust << " at " << tokens.line() << LL_ENDL;
}
// get encoding
std::string encoding = tokens.next();
if(encoding == "Unencoded")
{
templatep->setEncoding(ME_UNENCODED);
}
else if(encoding == "Zerocoded")
{
templatep->setEncoding(ME_ZEROCODED);
}
else
{
LL_ERRS() << "Bad encoding " << encoding << " at " << tokens.line() << LL_ENDL;
}
// get deprecation
if(tokens.want("Deprecated"))
{
templatep->setDeprecation(MD_DEPRECATED);
}
else if (tokens.want("UDPDeprecated"))
{
templatep->setDeprecation(MD_UDPDEPRECATED);
}
else if (tokens.want("UDPBlackListed"))
{
templatep->setDeprecation(MD_UDPBLACKLISTED);
}
else if (tokens.want("NotDeprecated"))
{
// this is the default value, but it can't hurt to set it twice
templatep->setDeprecation(MD_NOTDEPRECATED);
}
else {
// It's probably a brace, let's just start block processing
}
while(LLMessageBlock * blockp = parseBlock(tokens))
{
templatep->addBlock(blockp);
}
if(!tokens.want("}"))
{
LL_ERRS() << "Expecting closing } for message " << template_name
<< " at " << tokens.line() << LL_ENDL;
}
return templatep;
}
// static
LLMessageBlock * LLTemplateParser::parseBlock(LLTemplateTokenizer & tokens)
{
LLMessageBlock * blockp = NULL;
if(!tokens.want("{"))
{
return NULL;
}
// name first
std::string block_name = tokens.next();
// is name a legit C variable name
if (!b_variable_ok(block_name.c_str()))
{
LL_ERRS() << "not a legal block name: " << block_name
<< " at " << tokens.line() << LL_ENDL;
}
// now, block type ("Single", "Multiple", or "Variable")
std::string block_type = tokens.next();
// which one is it?
if (block_type == "Single")
{
// ok, we can create a block
blockp = new LLMessageBlock(block_name.c_str(), MBT_SINGLE);
}
else if (block_type == "Multiple")
{
// need to get the number of repeats
std::string repeats = tokens.next();
// is it a legal integer
if (!b_positive_integer_ok(repeats.c_str()))
{
LL_ERRS() << "not a legal integer for block multiple count: "
<< repeats << " at " << tokens.line() << LL_ENDL;
}
// ok, we can create a block
blockp = new LLMessageBlock(block_name.c_str(),
MBT_MULTIPLE,
atoi(repeats.c_str()));
}
else if (block_type == "Variable")
{
// ok, we can create a block
blockp = new LLMessageBlock(block_name.c_str(), MBT_VARIABLE);
}
else
{
LL_ERRS() << "bad block type: " << block_type
<< " at " << tokens.line() << LL_ENDL;
}
while(LLMessageVariable * varp = parseVariable(tokens))
{
blockp->addVariable(varp->getName(),
varp->getType(),
varp->getSize());
delete varp;
}
if(!tokens.want("}"))
{
LL_ERRS() << "Expecting closing } for block " << block_name
<< " at " << tokens.line() << LL_ENDL;
}
return blockp;
}
// static
LLMessageVariable * LLTemplateParser::parseVariable(LLTemplateTokenizer & tokens)
{
LLMessageVariable * varp = NULL;
if(!tokens.want("{"))
{
return NULL;
}
std::string var_name = tokens.next();
if (!b_variable_ok(var_name.c_str()))
{
LL_ERRS() << "Not a legit variable name: " << var_name
<< " at " << tokens.line() << LL_ENDL;
}
std::string var_type = tokens.next();
if (var_type == "U8")
{
varp = new LLMessageVariable(var_name.c_str(), MVT_U8, 1);
}
else if (var_type == "U16")
{
varp = new LLMessageVariable(var_name.c_str(), MVT_U16, 2);
}
else if (var_type == "U32")
{
varp = new LLMessageVariable(var_name.c_str(), MVT_U32, 4);
}
else if (var_type == "U64")
{
varp = new LLMessageVariable(var_name.c_str(), MVT_U64, 8);
}
else if (var_type == "S8")
{
varp = new LLMessageVariable(var_name.c_str(), MVT_S8, 1);
}
else if (var_type == "S16")
{
varp = new LLMessageVariable(var_name.c_str(), MVT_S16, 2);
}
else if (var_type == "S32")
{
varp = new LLMessageVariable(var_name.c_str(), MVT_S32, 4);
}
else if (var_type == "S64")
{
varp = new LLMessageVariable(var_name.c_str(), MVT_S64, 8);
}
else if (var_type == "F32")
{
varp = new LLMessageVariable(var_name.c_str(), MVT_F32, 4);
}
else if (var_type == "F64")
{
varp = new LLMessageVariable(var_name.c_str(), MVT_F64, 8);
}
else if (var_type == "LLVector3")
{
varp = new LLMessageVariable(var_name.c_str(), MVT_LLVector3, 12);
}
else if (var_type == "LLVector3d")
{
varp = new LLMessageVariable(var_name.c_str(), MVT_LLVector3d, 24);
}
else if (var_type == "LLVector4")
{
varp = new LLMessageVariable(var_name.c_str(), MVT_LLVector4, 16);
}
else if (var_type == "LLQuaternion")
{
varp = new LLMessageVariable(var_name.c_str(), MVT_LLQuaternion, 12);
}
else if (var_type == "LLUUID")
{
varp = new LLMessageVariable(var_name.c_str(), MVT_LLUUID, 16);
}
else if (var_type == "BOOL")
{
varp = new LLMessageVariable(var_name.c_str(), MVT_BOOL, 1);
}
else if (var_type == "IPADDR")
{
varp = new LLMessageVariable(var_name.c_str(), MVT_IP_ADDR, 4);
}
else if (var_type == "IPPORT")
{
varp = new LLMessageVariable(var_name.c_str(), MVT_IP_PORT, 2);
}
else if (var_type == "Fixed" || var_type == "Variable")
{
std::string variable_size = tokens.next();
if (!b_positive_integer_ok(variable_size.c_str()))
{
LL_ERRS() << "not a legal integer variable size: " << variable_size
<< " at " << tokens.line() << LL_ENDL;
}
EMsgVariableType type_enum;
if(var_type == "Variable")
{
type_enum = MVT_VARIABLE;
}
else if(var_type == "Fixed")
{
type_enum = MVT_FIXED;
}
else
{
type_enum = MVT_FIXED; // removes a warning
LL_ERRS() << "bad variable type: " << var_type
<< " at " << tokens.line() << LL_ENDL;
}
varp = new LLMessageVariable(
var_name.c_str(),
type_enum,
atoi(variable_size.c_str()));
}
else
{
LL_ERRS() << "bad variable type:" << var_type
<< " at " << tokens.line() << LL_ENDL;
}
if(!tokens.want("}"))
{
LL_ERRS() << "Expecting closing } for variable " << var_name
<< " at " << tokens.line() << LL_ENDL;
}
return varp;
}