264 lines
7.5 KiB
C++
264 lines
7.5 KiB
C++
/**
|
|
* @file llurlregistry.cpp
|
|
* @author Martin Reddy
|
|
* @brief Contains a set of Url types that can be matched in a string
|
|
*
|
|
* $LicenseInfo:firstyear=2009&license=viewerlgpl$
|
|
* Second Life Viewer Source Code
|
|
* Copyright (C) 2010, Linden Research, Inc.
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation;
|
|
* version 2.1 of the License only.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*
|
|
* Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
|
|
* $/LicenseInfo$
|
|
*/
|
|
|
|
#include "linden_common.h"
|
|
#include "llurlregistry.h"
|
|
|
|
#include <boost/regex.hpp>
|
|
|
|
// default dummy callback that ignores any label updates from the server
|
|
void LLUrlRegistryNullCallback(const std::string &url, const std::string &label, const std::string& icon)
|
|
{
|
|
}
|
|
|
|
LLUrlRegistry::LLUrlRegistry()
|
|
{
|
|
mUrlEntry.reserve(20);
|
|
|
|
// Urls are matched in the order that they were registered
|
|
registerUrl(new LLUrlEntryNoLink());
|
|
registerUrl(new LLUrlEntryIcon());
|
|
registerUrl(new LLUrlEntrySLURL());
|
|
registerUrl(new LLUrlEntryHTTP());
|
|
registerUrl(new LLUrlEntryHTTPLabel());
|
|
registerUrl(new LLUrlEntryAgentCompleteName());
|
|
registerUrl(new LLUrlEntryAgentDisplayName());
|
|
registerUrl(new LLUrlEntryAgentUserName());
|
|
// LLUrlEntryAgent*Name must appear before LLUrlEntryAgent since
|
|
// LLUrlEntryAgent is a less specific (catchall for agent urls)
|
|
registerUrl(new LLUrlEntryAgent());
|
|
registerUrl(new LLUrlEntryGroup());
|
|
registerUrl(new LLUrlEntryParcel());
|
|
registerUrl(new LLUrlEntryTeleport());
|
|
registerUrl(new LLUrlEntryRegion());
|
|
registerUrl(new LLUrlEntryWorldMap());
|
|
registerUrl(new LLUrlEntryObjectIM());
|
|
registerUrl(new LLUrlEntryPlace());
|
|
registerUrl(new LLUrlEntryInventory());
|
|
registerUrl(new LLUrlEntryObjectIM());
|
|
//LLUrlEntrySL and LLUrlEntrySLLabel have more common pattern,
|
|
//so it should be registered in the end of list
|
|
registerUrl(new LLUrlEntrySL());
|
|
registerUrl(new LLUrlEntrySLLabel());
|
|
// most common pattern is a URL without any protocol,
|
|
// e.g., "secondlife.com"
|
|
registerUrl(new LLUrlEntryHTTPNoProtocol());
|
|
}
|
|
|
|
LLUrlRegistry::~LLUrlRegistry()
|
|
{
|
|
// free all of the LLUrlEntryBase objects we are holding
|
|
std::vector<LLUrlEntryBase *>::iterator it;
|
|
for (it = mUrlEntry.begin(); it != mUrlEntry.end(); ++it)
|
|
{
|
|
delete *it;
|
|
}
|
|
}
|
|
|
|
void LLUrlRegistry::registerUrl(LLUrlEntryBase *url, bool force_front)
|
|
{
|
|
if (url)
|
|
{
|
|
if (force_front) // IDEVO
|
|
mUrlEntry.insert(mUrlEntry.begin(), url);
|
|
else
|
|
mUrlEntry.push_back(url);
|
|
}
|
|
}
|
|
|
|
static bool matchRegex(const char *text, boost::regex regex, U32 &start, U32 &end)
|
|
{
|
|
boost::cmatch result;
|
|
bool found;
|
|
|
|
// regex_search can potentially throw an exception, so check for it
|
|
try
|
|
{
|
|
found = boost::regex_search(text, result, regex);
|
|
}
|
|
catch (std::runtime_error &)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
if (! found)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
// return the first/last character offset for the matched substring
|
|
start = static_cast<U32>(result[0].first - text);
|
|
end = static_cast<U32>(result[0].second - text) - 1;
|
|
|
|
// we allow certain punctuation to terminate a Url but not match it,
|
|
// e.g., "http://foo.com/." should just match "http://foo.com/"
|
|
if (text[end] == '.' || text[end] == ',')
|
|
{
|
|
end--;
|
|
}
|
|
// ignore a terminating ')' when Url contains no matching '('
|
|
// see DEV-19842 for details
|
|
else if (text[end] == ')' && std::string(text+start, end-start).find('(') == std::string::npos)
|
|
{
|
|
end--;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool stringHasUrl(const std::string &text)
|
|
{
|
|
// fast heuristic test for a URL in a string. This is used
|
|
// to avoid lots of costly regex calls, BUT it needs to be
|
|
// kept in sync with the LLUrlEntry regexes we support.
|
|
return (text.find("://") != std::string::npos ||
|
|
text.find("www.") != std::string::npos ||
|
|
text.find(".com") != std::string::npos ||
|
|
text.find(".net") != std::string::npos ||
|
|
text.find(".edu") != std::string::npos ||
|
|
text.find(".org") != std::string::npos ||
|
|
text.find("<nolink>") != std::string::npos ||
|
|
text.find("<icon") != std::string::npos);
|
|
}
|
|
|
|
bool LLUrlRegistry::findUrl(const std::string &text, LLUrlMatch &match, const LLUrlLabelCallback &cb)
|
|
{
|
|
// avoid costly regexes if there is clearly no URL in the text
|
|
if (! stringHasUrl(text))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
// find the first matching regex from all url entries in the registry
|
|
U32 match_start = 0, match_end = 0;
|
|
LLUrlEntryBase *match_entry = NULL;
|
|
|
|
std::vector<LLUrlEntryBase *>::iterator it;
|
|
for (it = mUrlEntry.begin(); it != mUrlEntry.end(); ++it)
|
|
{
|
|
LLUrlEntryBase *url_entry = *it;
|
|
|
|
U32 start = 0, end = 0;
|
|
if (matchRegex(text.c_str(), url_entry->getPattern(), start, end))
|
|
{
|
|
// does this match occur in the string before any other match
|
|
if (start < match_start || match_entry == NULL)
|
|
{
|
|
match_start = start;
|
|
match_end = end;
|
|
match_entry = url_entry;
|
|
}
|
|
}
|
|
}
|
|
|
|
// did we find a match? if so, return its details in the match object
|
|
if (match_entry)
|
|
{
|
|
// fill in the LLUrlMatch object and return it
|
|
std::string url = text.substr(match_start, match_end - match_start + 1);
|
|
match.setValues(match_start, match_end,
|
|
match_entry->getUrl(url),
|
|
match_entry->getLabel(url, cb),
|
|
match_entry->getTooltip(url),
|
|
match_entry->getIcon(url),
|
|
//match_entry->getStyle(),
|
|
match_entry->getMenuName(),
|
|
match_entry->getLocation(url),
|
|
match_entry->getID(url),
|
|
match_entry->underlineOnHoverOnly(url));
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool LLUrlRegistry::findUrl(const LLWString &text, LLUrlMatch &match, const LLUrlLabelCallback &cb)
|
|
{
|
|
// boost::regex_search() only works on char or wchar_t
|
|
// types, but wchar_t is only 2-bytes on Win32 (not 4).
|
|
// So we use UTF-8 to make this work the same everywhere.
|
|
std::string utf8_text = wstring_to_utf8str(text);
|
|
if (findUrl(utf8_text, match, cb))
|
|
{
|
|
// we cannot blindly return the start/end offsets from
|
|
// the UTF-8 string because it is a variable-length
|
|
// character encoding, so we need to update the start
|
|
// and end values to be correct for the wide string.
|
|
LLWString wurl = utf8str_to_wstring(match.getUrl());
|
|
S32 start = text.find(wurl);
|
|
if (start == std::string::npos)
|
|
{
|
|
return false;
|
|
}
|
|
S32 end = start + wurl.size() - 1;
|
|
|
|
match.setValues(start, end, match.getUrl(),
|
|
match.getLabel(),
|
|
match.getTooltip(),
|
|
match.getIcon(),
|
|
//match.getStyle(),
|
|
match.getMenuName(),
|
|
match.getLocation(),
|
|
match.getID(),
|
|
match.underlineOnHoverOnly());
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool LLUrlRegistry::hasUrl(const std::string &text)
|
|
{
|
|
LLUrlMatch match;
|
|
return findUrl(text, match);
|
|
}
|
|
|
|
bool LLUrlRegistry::hasUrl(const LLWString &text)
|
|
{
|
|
LLUrlMatch match;
|
|
return findUrl(text, match);
|
|
}
|
|
|
|
bool LLUrlRegistry::isUrl(const std::string &text)
|
|
{
|
|
LLUrlMatch match;
|
|
if (findUrl(text, match))
|
|
{
|
|
return (match.getStart() == 0 && match.getEnd() >= text.size()-1);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool LLUrlRegistry::isUrl(const LLWString &text)
|
|
{
|
|
LLUrlMatch match;
|
|
if (findUrl(text, match))
|
|
{
|
|
return (match.getStart() == 0 && match.getEnd() >= text.size()-1);
|
|
}
|
|
return false;
|
|
}
|