/** * @file lluri.cpp * @author Phoenix * @date 2006-02-08 * @brief Implementation of the LLURI class. * * $LicenseInfo:firstyear=2006&license=viewerlgpl$ * Second Life Viewer Source Code * Copyright (C) 2010, Linden Research, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; * version 2.1 of the License only. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA * $/LicenseInfo$ */ #include "linden_common.h" #include "llapp.h" #include "lluri.h" #include "llsd.h" #include <iomanip> #include "lluuid.h" // system includes #include <boost/tokenizer.hpp> #include <boost/algorithm/string/find_iterator.hpp> #include <boost/algorithm/string/finder.hpp> // static void LLURI::encodeCharacter(std::ostream& ostr, std::string::value_type val) { ostr << "%" << std::uppercase << std::hex << std::setw(2) << std::setfill('0') // VWR-4010 Cannot cast to U32 because sign-extension on // chars > 128 will result in FFFFFFC3 instead of F3. << static_cast<S32>(static_cast<U8>(val)) // reset stream state << std::nouppercase << std::dec << std::setfill(' '); } // static std::string LLURI::escape( const std::string& str, const std::string& allowed, bool is_allowed_sorted) { // *NOTE: This size determination feels like a good value to // me. If someone wante to come up with a more precise heuristic // with some data to back up the assertion that 'sort is good' // then feel free to change this test a bit. if(!is_allowed_sorted && (str.size() > 2 * allowed.size())) { // if it's already sorted, or if the url is quite long, we // want to optimize this process. std::string sorted_allowed(allowed); std::sort(sorted_allowed.begin(), sorted_allowed.end()); return escape(str, sorted_allowed, true); } std::ostringstream ostr; std::string::const_iterator it = str.begin(); std::string::const_iterator end = str.end(); std::string::value_type c; if(is_allowed_sorted) { std::string::const_iterator allowed_begin(allowed.begin()); std::string::const_iterator allowed_end(allowed.end()); for(; it != end; ++it) { c = *it; if(std::binary_search(allowed_begin, allowed_end, c)) { ostr << c; } else { encodeCharacter(ostr, c); } } } else { for(; it != end; ++it) { c = *it; if(allowed.find(c) == std::string::npos) { encodeCharacter(ostr, c); } else { ostr << c; } } } return ostr.str(); } // static std::string LLURI::unescape(const std::string& str) { std::ostringstream ostr; std::string::const_iterator it = str.begin(); std::string::const_iterator end = str.end(); for(; it != end; ++it) { if((*it) == '%') { ++it; if(it == end) break; if(is_char_hex(*it)) { U8 c = hex_as_nybble(*it++); c = c << 4; if (it == end) break; if(is_char_hex(*it)) { c |= hex_as_nybble(*it); ostr.put((char)c); } else { ostr.put((char)c); ostr.put(*it); } } else { ostr.put('%'); ostr.put(*it); } } else { ostr.put(*it); } } return ostr.str(); } namespace { const std::string unreserved() { static const std::string s = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" "0123456789" "-._~"; return s; } const std::string path() { static const std::string s = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" "0123456789" "$-_.+" "!*'()," "{}|\\^~[]`" "<>#%" ";/?:@&="; return s; } const std::string sub_delims() { static const std::string s = "!$&'()*+,;="; return s; } std::string escapeHostAndPort(const std::string& s) { return LLURI::escape(s, unreserved() + sub_delims() +":"); } std::string escapePathComponent(const std::string& s) { return LLURI::escape(s, unreserved() + sub_delims() + ":@"); } std::string escapeQueryVariable(const std::string& s) { return LLURI::escape(s, unreserved() + ":@!$'()*+,"); } // sub_delims - "&;=" + ":@" std::string escapeQueryValue(const std::string& s) { return LLURI::escape(s, unreserved() + ":@!$'()*+,="); } // sub_delims - "&;" + ":@" std::string escapeUriQuery(const std::string& s) { return LLURI::escape(s, unreserved() + ":@?&$;*+=%/"); } std::string escapeUriData(const std::string& s) { return LLURI::escape(s, unreserved() + "%"); } std::string escapeUriPath(const std::string& s) { return LLURI::escape(s, path()); } } //static std::string LLURI::escape(const std::string& str) { static std::string default_allowed = unreserved(); static bool initialized = false; if(!initialized) { std::sort(default_allowed.begin(), default_allowed.end()); initialized = true; } return escape(str, default_allowed, true); } //static std::string LLURI::escapePathAndData(const std::string &str) { std::string result; const std::string data_marker = "data:"; if (str.compare(0, data_marker.length(), data_marker) == 0) { // This is not url, but data, data part needs to be properly escaped // data part is separated by ',' from header. Minimal data uri is "data:," // See "data URI scheme" size_t separator = str.find(','); if (separator != std::string::npos) { size_t header_size = separator + 1; std::string header = str.substr(0, header_size); // base64 is url-safe if (header.find("base64") != std::string::npos) { // assume url-safe data result = str; } else { std::string data = str.substr(header_size, str.length() - header_size); // Notes: File can be partially pre-escaped, that's why escaping ignores '%' // It somewhat limits user from displaying strings like "%20" in text // but that's how viewer worked for a while and user can double-escape it // Header doesn't need escaping result = header + escapeUriData(data); } } } else { // try processing it as path with query separator // The query component is indicated by the first question // mark("?") character and terminated by a number sign("#") size_t delim_pos = str.find('?'); if (delim_pos == std::string::npos) { // alternate separator delim_pos = str.find(';'); } if (delim_pos != std::string::npos) { size_t path_size = delim_pos + 1; std::string query; std::string fragment; size_t fragment_pos = str.find('#'); if (fragment_pos != std::string::npos) { query = str.substr(path_size, fragment_pos - path_size); fragment = str.substr(fragment_pos); } else { query = str.substr(path_size); } std::string path = str.substr(0, path_size); result = escapeUriPath(path) + escapeUriQuery(query) + escapeUriPath(fragment); } } if (result.empty()) { // Not a known scheme or no data part, try just escaping as Uri path result = escapeUriPath(str); } return result; } LLURI::LLURI() { } LLURI::LLURI(const std::string& escaped_str) { std::string::size_type delim_pos; delim_pos = escaped_str.find(':'); std::string temp; if (delim_pos == std::string::npos) { mScheme = ""; mEscapedOpaque = escaped_str; } else { mScheme = escaped_str.substr(0, delim_pos); mEscapedOpaque = escaped_str.substr(delim_pos+1); } parseAuthorityAndPathUsingOpaque(); delim_pos = mEscapedPath.find('?'); if (delim_pos != std::string::npos) { mEscapedQuery = mEscapedPath.substr(delim_pos+1); mEscapedPath = mEscapedPath.substr(0,delim_pos); } } static BOOL isDefault(const std::string& scheme, U16 port) { if (scheme == "http") return port == 80; if (scheme == "https") return port == 443; if (scheme == "ftp") return port == 21; return FALSE; } void LLURI::parseAuthorityAndPathUsingOpaque() { if (mScheme == "http" || mScheme == "https" || mScheme == "ftp" || mScheme == "secondlife" || mScheme == "x-grid-location-info") { if (mEscapedOpaque.substr(0,2) != "//") { return; } std::string::size_type delim_pos, delim_pos2; delim_pos = mEscapedOpaque.find('/', 2); delim_pos2 = mEscapedOpaque.find('?', 2); // no path, no query if (delim_pos == std::string::npos && delim_pos2 == std::string::npos) { mEscapedAuthority = mEscapedOpaque.substr(2); mEscapedPath = ""; } // path exist, no query else if (delim_pos2 == std::string::npos) { mEscapedAuthority = mEscapedOpaque.substr(2,delim_pos-2); mEscapedPath = mEscapedOpaque.substr(delim_pos); } // no path, only query else if (delim_pos == std::string::npos || delim_pos2 < delim_pos) { mEscapedAuthority = mEscapedOpaque.substr(2,delim_pos2-2); // query part will be broken out later mEscapedPath = mEscapedOpaque.substr(delim_pos2); } // path and query else { mEscapedAuthority = mEscapedOpaque.substr(2,delim_pos-2); // query part will be broken out later mEscapedPath = mEscapedOpaque.substr(delim_pos); } } else if (mScheme == "about") { mEscapedPath = mEscapedOpaque; } } LLURI::LLURI(const std::string& scheme, const std::string& userName, const std::string& password, const std::string& hostName, U16 port, const std::string& escapedPath, const std::string& escapedQuery) : mScheme(scheme), mEscapedPath(escapedPath), mEscapedQuery(escapedQuery) { std::ostringstream auth; std::ostringstream opaque; opaque << "//"; if (!userName.empty()) { auth << escape(userName); if (!password.empty()) { auth << ':' << escape(password); } auth << '@'; } auth << hostName; if (!isDefault(scheme, port)) { auth << ':' << port; } mEscapedAuthority = auth.str(); opaque << mEscapedAuthority << escapedPath << escapedQuery; mEscapedOpaque = opaque.str(); } LLURI::~LLURI() { } // static LLURI LLURI::buildHTTP(const std::string& prefix, const LLSD& path) { LLURI result; // TODO: deal with '/' '?' '#' in host_port if (prefix.find("://") != prefix.npos) { // it is a prefix result = LLURI(prefix); } else { // it is just a host and optional port result.mScheme = "http"; result.mEscapedAuthority = escapeHostAndPort(prefix); } if (path.isArray()) { // break out and escape each path component for (LLSD::array_const_iterator it = path.beginArray(); it != path.endArray(); ++it) { LL_DEBUGS() << "PATH: inserting " << it->asString() << LL_ENDL; result.mEscapedPath += "/" + escapePathComponent(it->asString()); } } else if (path.isString()) { std::string pathstr(path); // Trailing slash is significant in HTTP land. If caller specified, // make a point of preserving. std::string last_slash; std::string::size_type len(pathstr.length()); if (len && pathstr[len-1] == '/') { last_slash = "/"; } // Escape every individual path component, recombining with slashes. for (boost::split_iterator<std::string::const_iterator> ti(pathstr, boost::first_finder("/")), tend; ti != tend; ++ti) { // Eliminate a leading slash or duplicate slashes anywhere. (Extra // slashes show up here as empty components.) This test also // eliminates a trailing slash, hence last_slash above. if (! ti->empty()) { result.mEscapedPath += "/" + escapePathComponent(std::string(ti->begin(), ti->end())); } } // Reinstate trailing slash, if any. result.mEscapedPath += last_slash; } else if(path.isUndefined()) { // do nothing } else { LL_WARNS() << "Valid path arguments to buildHTTP are array, string, or undef, you passed type" << path.type() << LL_ENDL; } result.mEscapedOpaque = "//" + result.mEscapedAuthority + result.mEscapedPath; return result; } // static LLURI LLURI::buildHTTP(const std::string& prefix, const LLSD& path, const LLSD& query) { LLURI uri = buildHTTP(prefix, path); // break out and escape each query component uri.mEscapedQuery = mapToQueryString(query); uri.mEscapedOpaque += uri.mEscapedQuery ; uri.mEscapedQuery.erase(0,1); // trim the leading '?' return uri; } // static LLURI LLURI::buildHTTP(const std::string& host, const U32& port, const LLSD& path) { return LLURI::buildHTTP(llformat("%s:%u", host.c_str(), port), path); } // static LLURI LLURI::buildHTTP(const std::string& host, const U32& port, const LLSD& path, const LLSD& query) { return LLURI::buildHTTP(llformat("%s:%u", host.c_str(), port), path, query); } std::string LLURI::asString() const { if (mScheme.empty()) { return mEscapedOpaque; } else { return mScheme + ":" + mEscapedOpaque; } } std::string LLURI::scheme() const { return mScheme; } std::string LLURI::opaque() const { return unescape(mEscapedOpaque); } std::string LLURI::authority() const { return unescape(mEscapedAuthority); } namespace { void findAuthorityParts(const std::string& authority, std::string& user, std::string& host, std::string& port) { std::string::size_type start_pos = authority.find('@'); if (start_pos == std::string::npos) { user = ""; start_pos = 0; } else { user = authority.substr(0, start_pos); start_pos += 1; } std::string::size_type end_pos = authority.find(':', start_pos); if (end_pos == std::string::npos) { host = authority.substr(start_pos); port = ""; } else { host = authority.substr(start_pos, end_pos - start_pos); port = authority.substr(end_pos + 1); } } } std::string LLURI::hostName() const { std::string user, host, port; findAuthorityParts(mEscapedAuthority, user, host, port); return unescape(host); } std::string LLURI::userName() const { std::string user, userPass, host, port; findAuthorityParts(mEscapedAuthority, userPass, host, port); std::string::size_type pos = userPass.find(':'); if (pos != std::string::npos) { user = userPass.substr(0, pos); } return unescape(user); } std::string LLURI::password() const { std::string pass, userPass, host, port; findAuthorityParts(mEscapedAuthority, userPass, host, port); std::string::size_type pos = userPass.find(':'); if (pos != std::string::npos) { pass = userPass.substr(pos + 1); } return unescape(pass); } BOOL LLURI::defaultPort() const { return isDefault(mScheme, hostPort()); } U16 LLURI::hostPort() const { std::string user, host, port; findAuthorityParts(mEscapedAuthority, user, host, port); if (port.empty()) { if (mScheme == "http") return 80; if (mScheme == "https") return 443; if (mScheme == "ftp") return 21; return 0; } return atoi(port.c_str()); } std::string LLURI::path() const { return unescape(mEscapedPath); } LLSD LLURI::pathArray() const { typedef boost::tokenizer<boost::char_separator<char> > tokenizer; boost::char_separator<char> sep("/", "", boost::drop_empty_tokens); tokenizer tokens(mEscapedPath, sep); tokenizer::iterator it = tokens.begin(); tokenizer::iterator end = tokens.end(); LLSD params; for ( ; it != end; ++it) { params.append(*it); } return params; } std::string LLURI::query() const { return unescape(mEscapedQuery); } LLSD LLURI::queryMap() const { return queryMap(mEscapedQuery); } // static LLSD LLURI::queryMap(std::string escaped_query_string) { LL_DEBUGS() << "LLURI::queryMap query params: " << escaped_query_string << LL_ENDL; LLSD result = LLSD::emptyArray(); while(!escaped_query_string.empty()) { // get tuple first std::string tuple; std::string::size_type tuple_begin = escaped_query_string.find('&'); if (tuple_begin != std::string::npos) { tuple = escaped_query_string.substr(0, tuple_begin); escaped_query_string = escaped_query_string.substr(tuple_begin+1); } else { tuple = escaped_query_string; escaped_query_string = ""; } if (tuple.empty()) continue; // parse tuple std::string::size_type key_end = tuple.find('='); if (key_end != std::string::npos) { std::string key = unescape(tuple.substr(0,key_end)); std::string value = unescape(tuple.substr(key_end+1)); LL_DEBUGS() << "inserting key " << key << " value " << value << LL_ENDL; result[key] = value; } else { LL_DEBUGS() << "inserting key " << unescape(tuple) << " value true" << LL_ENDL; result[unescape(tuple)] = true; } } return result; } std::string LLURI::mapToQueryString(const LLSD& queryMap) { std::string query_string; if (queryMap.isMap()) { bool first_element = true; LLSD::map_const_iterator iter = queryMap.beginMap(); LLSD::map_const_iterator end = queryMap.endMap(); std::ostringstream ostr; for (; iter != end; ++iter) { if(first_element) { ostr << "?"; first_element = false; } else { ostr << "&"; } ostr << escapeQueryVariable(iter->first); if(iter->second.isDefined()) { ostr << "=" << escapeQueryValue(iter->second.asString()); } } query_string = ostr.str(); } return query_string; } bool operator!=(const LLURI& first, const LLURI& second) { return (first.asString() != second.asString()); }