diff options
Diffstat (limited to 'indra/llui/llurlregistry.cpp')
-rw-r--r-- | indra/llui/llurlregistry.cpp | 254 |
1 files changed, 254 insertions, 0 deletions
diff --git a/indra/llui/llurlregistry.cpp b/indra/llui/llurlregistry.cpp new file mode 100644 index 0000000000..9d215cf7ef --- /dev/null +++ b/indra/llui/llurlregistry.cpp @@ -0,0 +1,254 @@ +/** + * @file llurlregistry.cpp + * @author Martin Reddy + * @brief Contains a set of Url types that can be matched in a string + * + * $LicenseInfo:firstyear=2009&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ + +#include "linden_common.h" +#include "llurlregistry.h" + +#include <boost/regex.hpp> + +// default dummy callback that ignores any label updates from the server +void LLUrlRegistryNullCallback(const std::string &url, const std::string &label) +{ +} + +LLUrlRegistry::LLUrlRegistry() +{ + // Urls are matched in the order that they were registered + registerUrl(new LLUrlEntryNoLink()); + registerUrl(new LLUrlEntryIcon()); + registerUrl(new LLUrlEntrySLURL()); + registerUrl(new LLUrlEntryHTTP()); + registerUrl(new LLUrlEntryHTTPLabel()); + registerUrl(new LLUrlEntryAgent()); + registerUrl(new LLUrlEntryGroup()); + registerUrl(new LLUrlEntryParcel()); + registerUrl(new LLUrlEntryTeleport()); + registerUrl(new LLUrlEntryWorldMap()); + registerUrl(new LLUrlEntryObjectIM()); + registerUrl(new LLUrlEntryPlace()); + registerUrl(new LLUrlEntryInventory()); + registerUrl(new LLUrlEntryObjectIM()); + //LLUrlEntrySL and LLUrlEntrySLLabel have more common pattern, + //so it should be registered in the end of list + registerUrl(new LLUrlEntrySL()); + registerUrl(new LLUrlEntrySLLabel()); + // most common pattern is a URL without any protocol, + // e.g., "secondlife.com" + registerUrl(new LLUrlEntryHTTPNoProtocol()); +} + +LLUrlRegistry::~LLUrlRegistry() +{ + // free all of the LLUrlEntryBase objects we are holding + std::vector<LLUrlEntryBase *>::iterator it; + for (it = mUrlEntry.begin(); it != mUrlEntry.end(); ++it) + { + delete *it; + } +} + +void LLUrlRegistry::registerUrl(LLUrlEntryBase *url) +{ + if (url) + { + mUrlEntry.push_back(url); + } +} + +static bool matchRegex(const char *text, boost::regex regex, U32 &start, U32 &end) +{ + boost::cmatch result; + bool found; + + // regex_search can potentially throw an exception, so check for it + try + { + found = boost::regex_search(text, result, regex); + } + catch (std::runtime_error &) + { + return false; + } + + if (! found) + { + return false; + } + + // return the first/last character offset for the matched substring + start = static_cast<U32>(result[0].first - text); + end = static_cast<U32>(result[0].second - text) - 1; + + // we allow certain punctuation to terminate a Url but not match it, + // e.g., "http://foo.com/." should just match "http://foo.com/" + if (text[end] == '.' || text[end] == ',') + { + end--; + } + // ignore a terminating ')' when Url contains no matching '(' + // see DEV-19842 for details + else if (text[end] == ')' && std::string(text+start, end-start).find('(') == std::string::npos) + { + end--; + } + + return true; +} + +static bool stringHasUrl(const std::string &text) +{ + // fast heuristic test for a URL in a string. This is used + // to avoid lots of costly regex calls, BUT it needs to be + // kept in sync with the LLUrlEntry regexes we support. + return (text.find("://") != std::string::npos || + text.find("www.") != std::string::npos || + text.find(".com") != std::string::npos || + text.find(".net") != std::string::npos || + text.find(".edu") != std::string::npos || + text.find(".org") != std::string::npos || + text.find("<nolink>") != std::string::npos || + text.find("<icon") != std::string::npos); +} + +bool LLUrlRegistry::findUrl(const std::string &text, LLUrlMatch &match, const LLUrlLabelCallback &cb) +{ + // avoid costly regexes if there is clearly no URL in the text + if (! stringHasUrl(text)) + { + return false; + } + + // find the first matching regex from all url entries in the registry + U32 match_start = 0, match_end = 0; + LLUrlEntryBase *match_entry = NULL; + + std::vector<LLUrlEntryBase *>::iterator it; + for (it = mUrlEntry.begin(); it != mUrlEntry.end(); ++it) + { + LLUrlEntryBase *url_entry = *it; + + U32 start = 0, end = 0; + if (matchRegex(text.c_str(), url_entry->getPattern(), start, end)) + { + // does this match occur in the string before any other match + if (start < match_start || match_entry == NULL) + { + match_start = start; + match_end = end; + match_entry = url_entry; + } + } + } + + // did we find a match? if so, return its details in the match object + if (match_entry) + { + // fill in the LLUrlMatch object and return it + std::string url = text.substr(match_start, match_end - match_start + 1); + match.setValues(match_start, match_end, + match_entry->getUrl(url), + match_entry->getLabel(url, cb), + match_entry->getTooltip(url), + match_entry->getIcon(url), + match_entry->getColor(), + match_entry->getMenuName(), + match_entry->getLocation(url), + match_entry->isLinkDisabled(), + match_entry->getID(url), + match_entry->underlineOnHoverOnly(url)); + return true; + } + + return false; +} + +bool LLUrlRegistry::findUrl(const LLWString &text, LLUrlMatch &match, const LLUrlLabelCallback &cb) +{ + // boost::regex_search() only works on char or wchar_t + // types, but wchar_t is only 2-bytes on Win32 (not 4). + // So we use UTF-8 to make this work the same everywhere. + std::string utf8_text = wstring_to_utf8str(text); + if (findUrl(utf8_text, match, cb)) + { + // we cannot blindly return the start/end offsets from + // the UTF-8 string because it is a variable-length + // character encoding, so we need to update the start + // and end values to be correct for the wide string. + LLWString wurl = utf8str_to_wstring(match.getUrl()); + S32 start = text.find(wurl); + if (start == std::string::npos) + { + return false; + } + S32 end = start + wurl.size() - 1; + + match.setValues(start, end, match.getUrl(), + match.getLabel(), + match.getTooltip(), + match.getIcon(), + match.getColor(), + match.getMenuName(), + match.getLocation(), + match.isLinkDisabled(), + match.getID(), + match.underlineOnHoverOnly()); + return true; + } + return false; +} + +bool LLUrlRegistry::hasUrl(const std::string &text) +{ + LLUrlMatch match; + return findUrl(text, match); +} + +bool LLUrlRegistry::hasUrl(const LLWString &text) +{ + LLUrlMatch match; + return findUrl(text, match); +} + +bool LLUrlRegistry::isUrl(const std::string &text) +{ + LLUrlMatch match; + if (findUrl(text, match)) + { + return (match.getStart() == 0 && match.getEnd() >= text.size()-1); + } + return false; +} + +bool LLUrlRegistry::isUrl(const LLWString &text) +{ + LLUrlMatch match; + if (findUrl(text, match)) + { + return (match.getStart() == 0 && match.getEnd() >= text.size()-1); + } + return false; +} |