summaryrefslogtreecommitdiff
path: root/indra
diff options
context:
space:
mode:
authorMonty Brandenberg <monty@lindenlab.com>2012-06-08 20:21:54 -0400
committerMonty Brandenberg <monty@lindenlab.com>2012-06-08 20:21:54 -0400
commit28a04400b4160dd34166483ddcf0c12637bcc363 (patch)
treee43ca202df6ed9bc8cbcc4a97d354e4798b7aa62 /indra
parent1e3d05329f2e823191c7c91926bee5ec9e5dc4d7 (diff)
Implemented HTTP retry for requests. Went in rather easily which
surprised me. Added a retry queue similar to ready queue to the policy object which is sorted by retry time. Currently do five retries (after the initial try) delayed by .25, .5, 1, 2 and 5 seconds. Removed the retry logic from the lltexturefetch module. Upped the waiting time in the unit test for the retries. People won't like this but tough, need tests.
Diffstat (limited to 'indra')
-rw-r--r--indra/llcorehttp/_httplibcurl.cpp58
-rw-r--r--indra/llcorehttp/_httplibcurl.h4
-rw-r--r--indra/llcorehttp/_httpoprequest.cpp51
-rw-r--r--indra/llcorehttp/_httpoprequest.h5
-rw-r--r--indra/llcorehttp/_httppolicy.cpp98
-rw-r--r--indra/llcorehttp/_httppolicy.h19
-rw-r--r--indra/llcorehttp/_httpreadyqueue.h2
-rw-r--r--indra/llcorehttp/_httpretryqueue.h94
-rw-r--r--indra/llcorehttp/httpcommon.h12
-rw-r--r--indra/llcorehttp/tests/test_httprequest.hpp2
-rw-r--r--indra/newview/lltexturefetch.cpp145
-rw-r--r--indra/newview/lltexturefetch.h6
12 files changed, 402 insertions, 94 deletions
diff --git a/indra/llcorehttp/_httplibcurl.cpp b/indra/llcorehttp/_httplibcurl.cpp
index 5272c391e8..05b2c2be69 100644
--- a/indra/llcorehttp/_httplibcurl.cpp
+++ b/indra/llcorehttp/_httplibcurl.cpp
@@ -29,6 +29,7 @@
#include "httpheaders.h"
#include "bufferarray.h"
#include "_httpoprequest.h"
+#include "_httppolicy.h"
namespace LLCore
@@ -85,6 +86,8 @@ void HttpLibcurl::term()
HttpService::ELoopSpeed HttpLibcurl::processTransport()
{
+ HttpService::ELoopSpeed ret(HttpService::REQUEST_SLEEP);
+
// Give libcurl some cycles to do I/O & callbacks
for (int policy_class(0); policy_class < HttpRequest::POLICY_CLASS_LIMIT; ++policy_class)
{
@@ -110,7 +113,8 @@ HttpService::ELoopSpeed HttpLibcurl::processTransport()
CURL * handle(msg->easy_handle);
CURLcode result(msg->data.result);
- completeRequest(mMultiHandles[policy_class], handle, result);
+ HttpService::ELoopSpeed speed(completeRequest(mMultiHandles[policy_class], handle, result));
+ ret = (std::min)(ret, speed);
handle = NULL; // No longer valid on return
}
else if (CURLMSG_NONE == msg->msg)
@@ -127,7 +131,11 @@ HttpService::ELoopSpeed HttpLibcurl::processTransport()
}
}
- return mActiveOps.empty() ? HttpService::REQUEST_SLEEP : HttpService::NORMAL;
+ if (! mActiveOps.empty())
+ {
+ ret = (std::min)(ret, HttpService::NORMAL);
+ }
+ return ret;
}
@@ -153,8 +161,12 @@ void HttpLibcurl::addOp(HttpOpRequest * op)
}
-void HttpLibcurl::completeRequest(CURLM * multi_handle, CURL * handle, CURLcode status)
+HttpService::ELoopSpeed HttpLibcurl::completeRequest(CURLM * multi_handle, CURL * handle, CURLcode status)
{
+ static const HttpStatus cant_connect(HttpStatus::EXT_CURL_EASY, CURLE_COULDNT_CONNECT);
+ static const HttpStatus cant_res_proxy(HttpStatus::EXT_CURL_EASY, CURLE_COULDNT_RESOLVE_PROXY);
+ static const HttpStatus cant_res_host(HttpStatus::EXT_CURL_EASY, CURLE_COULDNT_RESOLVE_HOST);
+
HttpOpRequest * op(NULL);
curl_easy_getinfo(handle, CURLINFO_PRIVATE, &op);
// *FIXME: check the pointer
@@ -190,10 +202,7 @@ void HttpLibcurl::completeRequest(CURLM * multi_handle, CURL * handle, CURLcode
int http_status(200);
curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &http_status);
- op->mStatus = LLCore::HttpStatus(http_status,
- (http_status >= 200 && http_status <= 299
- ? HE_SUCCESS
- : HE_REPLY_ERROR));
+ op->mStatus = LLCore::HttpStatus(http_status);
}
// Detach from multi and recycle handle
@@ -201,9 +210,42 @@ void HttpLibcurl::completeRequest(CURLM * multi_handle, CURL * handle, CURLcode
curl_easy_cleanup(handle);
op->mCurlHandle = NULL;
- // Deliver to reply queue and release
+ // Retry or finalize
+ if (! op->mStatus)
+ {
+ // If this failed, we might want to retry. Have to inspect
+ // the status a little more deeply for those reasons worth retrying...
+ if (op->mPolicyRetries < op->mPolicyRetryLimit &&
+ ((op->mStatus.isHttpStatus() && op->mStatus.mType >= 499 && op->mStatus.mType <= 599) ||
+ cant_connect == op->mStatus ||
+ cant_res_proxy == op->mStatus ||
+ cant_res_host == op->mStatus))
+ {
+ // Okay, worth a retry. We include 499 in this test as
+ // it's the old 'who knows?' error from many grid services...
+ HttpPolicy & policy(mService->getPolicy());
+
+ policy.retryOp(op);
+ return HttpService::NORMAL; // Having pushed to retry, keep things running
+ }
+ }
+
+ // This op is done, finalize it delivering it to the reply queue...
+ if (! op->mStatus)
+ {
+ LL_WARNS("CoreHttp") << "URL op failed after " << op->mPolicyRetries
+ << " retries. Reason: " << op->mStatus.toString()
+ << LL_ENDL;
+ }
+ else if (op->mPolicyRetries)
+ {
+ LL_WARNS("CoreHttp") << "URL op succeeded after " << op->mPolicyRetries << " retries."
+ << LL_ENDL;
+ }
+
op->stageFromActive(mService);
op->release();
+ return HttpService::REQUEST_SLEEP;
}
diff --git a/indra/llcorehttp/_httplibcurl.h b/indra/llcorehttp/_httplibcurl.h
index ec325c1946..fe628b9ab0 100644
--- a/indra/llcorehttp/_httplibcurl.h
+++ b/indra/llcorehttp/_httplibcurl.h
@@ -83,7 +83,9 @@ public:
protected:
/// Invoked when libcurl has indicated a request has been processed
/// to completion and we need to move the request to a new state.
- void completeRequest(CURLM * multi_handle, CURL * handle, CURLcode status);
+ HttpService::ELoopSpeed completeRequest(CURLM * multi_handle,
+ CURL * handle,
+ CURLcode status);
protected:
typedef std::set<HttpOpRequest *> active_set_t;
diff --git a/indra/llcorehttp/_httpoprequest.cpp b/indra/llcorehttp/_httpoprequest.cpp
index 4bdc4a5257..895629c514 100644
--- a/indra/llcorehttp/_httpoprequest.cpp
+++ b/indra/llcorehttp/_httpoprequest.cpp
@@ -99,8 +99,15 @@ HttpOpRequest::HttpOpRequest()
mReplyBody(NULL),
mReplyOffset(0),
mReplyLength(0),
- mReplyHeaders(NULL)
-{}
+ mReplyHeaders(NULL),
+ mPolicyRetries(0),
+ mPolicyRetryAt(HttpTime(0)),
+ mPolicyRetryLimit(5) // *FIXME: Get from policy definitions
+{
+ // *NOTE: As members are added, retry initialization/cleanup
+ // may need to be extended in @prepareRequest().
+}
+
HttpOpRequest::~HttpOpRequest()
@@ -130,7 +137,6 @@ HttpOpRequest::~HttpOpRequest()
}
mCurlService = NULL;
-
if (mCurlHeaders)
{
@@ -313,6 +319,30 @@ HttpStatus HttpOpRequest::setupPost(HttpRequest::policy_t policy_id,
HttpStatus HttpOpRequest::prepareRequest(HttpService * service)
{
+ // Scrub transport and result data for retried op case
+ mCurlActive = false;
+ mCurlHandle = NULL;
+ mCurlService = NULL;
+ if (mCurlHeaders)
+ {
+ curl_slist_free_all(mCurlHeaders);
+ mCurlHeaders = NULL;
+ }
+ mCurlBodyPos = 0;
+
+ if (mReplyBody)
+ {
+ mReplyBody->release();
+ mReplyBody = NULL;
+ }
+ mReplyOffset = 0;
+ mReplyLength = 0;
+ if (mReplyHeaders)
+ {
+ mReplyHeaders->release();
+ mReplyHeaders = NULL;
+ }
+
// *FIXME: better error handling later
HttpStatus status;
@@ -321,6 +351,7 @@ HttpStatus HttpOpRequest::prepareRequest(HttpService * service)
mCurlHandle = curl_easy_init();
// curl_easy_setopt(mCurlHandle, CURLOPT_VERBOSE, 1);
+ curl_easy_setopt(mCurlHandle, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
curl_easy_setopt(mCurlHandle, CURLOPT_TIMEOUT, 30);
curl_easy_setopt(mCurlHandle, CURLOPT_CONNECTTIMEOUT, 30);
curl_easy_setopt(mCurlHandle, CURLOPT_NOSIGNAL, 1);
@@ -403,12 +434,7 @@ HttpStatus HttpOpRequest::prepareRequest(HttpService * service)
break;
}
- if (mReqHeaders)
- {
- mCurlHeaders = append_headers_to_slist(mReqHeaders, mCurlHeaders);
- }
- mCurlHeaders = curl_slist_append(mCurlHeaders, "Pragma:");
-
+ // There's a CURLOPT for this now...
if ((mReqOffset || mReqLength) && HOR_GET == mReqMethod)
{
static const char * const fmt1("Range: bytes=%lu-%lu");
@@ -428,6 +454,13 @@ HttpStatus HttpOpRequest::prepareRequest(HttpService * service)
range_line[sizeof(range_line) - 1] = '\0';
mCurlHeaders = curl_slist_append(mCurlHeaders, range_line);
}
+
+ mCurlHeaders = curl_slist_append(mCurlHeaders, "Pragma:");
+ if (mReqHeaders)
+ {
+ // Caller's headers last to override
+ mCurlHeaders = append_headers_to_slist(mReqHeaders, mCurlHeaders);
+ }
curl_easy_setopt(mCurlHandle, CURLOPT_HTTPHEADER, mCurlHeaders);
if (mProcFlags & (PF_SCAN_RANGE_HEADER | PF_SAVE_HEADERS))
diff --git a/indra/llcorehttp/_httpoprequest.h b/indra/llcorehttp/_httpoprequest.h
index 0cad4e8459..6dcf30ca0c 100644
--- a/indra/llcorehttp/_httpoprequest.h
+++ b/indra/llcorehttp/_httpoprequest.h
@@ -128,6 +128,11 @@ public:
off_t mReplyOffset;
size_t mReplyLength;
HttpHeaders * mReplyHeaders;
+
+ // Policy data
+ int mPolicyRetries;
+ HttpTime mPolicyRetryAt;
+ const int mPolicyRetryLimit;
}; // end class HttpOpRequest
diff --git a/indra/llcorehttp/_httppolicy.cpp b/indra/llcorehttp/_httppolicy.cpp
index 51f5e487dc..1f4cd34a4b 100644
--- a/indra/llcorehttp/_httppolicy.cpp
+++ b/indra/llcorehttp/_httppolicy.cpp
@@ -24,39 +24,46 @@
* $/LicenseInfo$
*/
+#include "linden_common.h"
+
#include "_httppolicy.h"
#include "_httpoprequest.h"
#include "_httpservice.h"
#include "_httplibcurl.h"
+#include "lltimer.h"
+
namespace LLCore
{
HttpPolicy::HttpPolicy(HttpService * service)
: mService(service)
-{
- for (int policy_class(0); policy_class < HttpRequest::POLICY_CLASS_LIMIT; ++policy_class)
- {
- mReadyInClass[policy_class] = 0;
- }
-}
+{}
HttpPolicy::~HttpPolicy()
{
- for (int policy_class(0); policy_class < HttpRequest::POLICY_CLASS_LIMIT; ++policy_class)
+ for (int policy_class(0); policy_class < LL_ARRAY_SIZE(mState); ++policy_class)
{
- HttpReadyQueue & readyq(mReadyQueue[policy_class]);
+ HttpRetryQueue & retryq(mState[policy_class].mRetryQueue);
+ while (! retryq.empty())
+ {
+ HttpOpRequest * op(retryq.top());
+ op->cancel();
+ op->release();
+ retryq.pop();
+ }
+
+ HttpReadyQueue & readyq(mState[policy_class].mReadyQueue);
while (! readyq.empty())
{
HttpOpRequest * op(readyq.top());
op->cancel();
op->release();
- mReadyInClass[policy_class]--;
readyq.pop();
}
}
@@ -68,27 +75,69 @@ void HttpPolicy::addOp(HttpOpRequest * op)
{
const int policy_class(op->mReqPolicy);
- mReadyQueue[policy_class].push(op);
- ++mReadyInClass[policy_class];
+ op->mPolicyRetries = 0;
+ mState[policy_class].mReadyQueue.push(op);
+}
+
+
+void HttpPolicy::retryOp(HttpOpRequest * op)
+{
+ static const HttpTime retry_deltas[] =
+ {
+ 250000, // 1st retry in 0.25 S, etc...
+ 500000,
+ 1000000,
+ 2000000,
+ 5000000 // ... to every 5.0 S.
+ };
+ static const int delta_max(int(LL_ARRAY_SIZE(retry_deltas)) - 1);
+
+ const HttpTime now(totalTime());
+ const int policy_class(op->mReqPolicy);
+
+ const HttpTime delta(retry_deltas[llclamp(op->mPolicyRetries, 0, delta_max)]);
+ op->mPolicyRetryAt = now + delta;
+ ++op->mPolicyRetries;
+ LL_WARNS("CoreHttp") << "URL op retry #" << op->mPolicyRetries
+ << " being scheduled for " << delta << " uSecs from now."
+ << LL_ENDL;
+ mState[policy_class].mRetryQueue.push(op);
}
HttpService::ELoopSpeed HttpPolicy::processReadyQueue()
{
+ const HttpTime now(totalTime());
HttpService::ELoopSpeed result(HttpService::REQUEST_SLEEP);
HttpLibcurl & transport(mService->getTransport());
- for (int policy_class(0); policy_class < HttpRequest::POLICY_CLASS_LIMIT; ++policy_class)
+ for (int policy_class(0); policy_class < LL_ARRAY_SIZE(mState); ++policy_class)
{
- HttpReadyQueue & readyq(mReadyQueue[policy_class]);
int active(transport.getActiveCountInClass(policy_class));
int needed(8 - active);
- if (needed > 0 && mReadyInClass[policy_class] > 0)
+ HttpRetryQueue & retryq(mState[policy_class].mRetryQueue);
+ HttpReadyQueue & readyq(mState[policy_class].mReadyQueue);
+
+ if (needed > 0)
{
- // Scan ready queue for requests that match policy
-
- while (! readyq.empty() && needed > 0 && mReadyInClass[policy_class] > 0)
+ // First see if we have any retries...
+ while (needed > 0 && ! retryq.empty())
+ {
+ HttpOpRequest * op(retryq.top());
+ if (op->mPolicyRetryAt > now)
+ break;
+
+ retryq.pop();
+
+ op->stageFromReady(mService);
+ op->release();
+
+ --needed;
+ }
+
+ // Now go on to the new requests...
+ while (needed > 0 && ! readyq.empty())
{
HttpOpRequest * op(readyq.top());
readyq.pop();
@@ -96,17 +145,16 @@ HttpService::ELoopSpeed HttpPolicy::processReadyQueue()
op->stageFromReady(mService);
op->release();
- --mReadyInClass[policy_class];
--needed;
}
}
-
- if (! readyq.empty())
+
+ if (! readyq.empty() || ! retryq.empty())
{
// If anything is ready, continue looping...
result = (std::min)(result, HttpService::NORMAL);
}
- }
+ } // end foreach policy_class
return result;
}
@@ -114,9 +162,9 @@ HttpService::ELoopSpeed HttpPolicy::processReadyQueue()
bool HttpPolicy::changePriority(HttpHandle handle, HttpRequest::priority_t priority)
{
- for (int policy_class(0); policy_class < HttpRequest::POLICY_CLASS_LIMIT; ++policy_class)
+ for (int policy_class(0); policy_class < LL_ARRAY_SIZE(mState); ++policy_class)
{
- HttpReadyQueue::container_type & c(mReadyQueue[policy_class].get_container());
+ HttpReadyQueue::container_type & c(mState[policy_class].mReadyQueue.get_container());
// Scan ready queue for requests that match policy
for (HttpReadyQueue::container_type::iterator iter(c.begin()); c.end() != iter;)
@@ -126,9 +174,9 @@ bool HttpPolicy::changePriority(HttpHandle handle, HttpRequest::priority_t prior
if (static_cast<HttpHandle>(*cur) == handle)
{
HttpOpRequest * op(*cur);
- c.erase(cur); // All iterators are now invalidated
+ c.erase(cur); // All iterators are now invalidated
op->mReqPriority = priority;
- mReadyQueue[policy_class].push(op); // Re-insert using adapter class
+ mState[policy_class].mReadyQueue.push(op); // Re-insert using adapter class
return true;
}
}
diff --git a/indra/llcorehttp/_httppolicy.h b/indra/llcorehttp/_httppolicy.h
index 425079ec63..6f18264f3d 100644
--- a/indra/llcorehttp/_httppolicy.h
+++ b/indra/llcorehttp/_httppolicy.h
@@ -31,6 +31,7 @@
#include "httprequest.h"
#include "_httpservice.h"
#include "_httpreadyqueue.h"
+#include "_httpretryqueue.h"
#include "_httppolicyglobal.h"
@@ -67,6 +68,14 @@ public:
/// additional references will be added.)
void addOp(HttpOpRequest *);
+ /// Similar to addOp, used when a caller wants to retry a
+ /// request that has failed. It's placed on a special retry
+ /// queue but ordered by retry time not priority. Otherwise,
+ /// handling is the same and retried operations are considered
+ /// before new ones but that doesn't guarantee completion
+ /// order.
+ void retryOp(HttpOpRequest *);
+
// Shadows HttpService's method
bool changePriority(HttpHandle handle, HttpRequest::priority_t priority);
@@ -77,10 +86,14 @@ public:
return mGlobalOptions;
}
-
protected:
- int mReadyInClass[HttpRequest::POLICY_CLASS_LIMIT];
- HttpReadyQueue mReadyQueue[HttpRequest::POLICY_CLASS_LIMIT];
+ struct State
+ {
+ HttpReadyQueue mReadyQueue;
+ HttpRetryQueue mRetryQueue;
+ };
+
+ State mState[HttpRequest::POLICY_CLASS_LIMIT];
HttpService * mService; // Naked pointer, not refcounted, not owner
HttpPolicyGlobal mGlobalOptions;
diff --git a/indra/llcorehttp/_httpreadyqueue.h b/indra/llcorehttp/_httpreadyqueue.h
index 2cd96aefe3..87828834dc 100644
--- a/indra/llcorehttp/_httpreadyqueue.h
+++ b/indra/llcorehttp/_httpreadyqueue.h
@@ -36,8 +36,6 @@
namespace LLCore
{
-class HttpOpRequest;
-
/// HttpReadyQueue provides a simple priority queue for HttpOpRequest objects.
///
/// This uses the priority_queue adaptor class to provide the queue
diff --git a/indra/llcorehttp/_httpretryqueue.h b/indra/llcorehttp/_httpretryqueue.h
new file mode 100644
index 0000000000..745adec09d
--- /dev/null
+++ b/indra/llcorehttp/_httpretryqueue.h
@@ -0,0 +1,94 @@
+/**
+ * @file _httpretryqueue.h
+ * @brief Internal declaration for the operation retry queue
+ *
+ * $LicenseInfo:firstyear=2012&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2012, Linden Research, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
+ * $/LicenseInfo$
+ */
+
+#ifndef _LLCORE_HTTP_RETRY_QUEUE_H_
+#define _LLCORE_HTTP_RETRY_QUEUE_H_
+
+
+#include <queue>
+
+#include "_httpoprequest.h"
+
+
+namespace LLCore
+{
+
+/// HttpRetryQueue provides a simple priority queue for HttpOpRequest objects.
+///
+/// This uses the priority_queue adaptor class to provide the queue
+/// as well as the ordering scheme while allowing us access to the
+/// raw container if we follow a few simple rules. One of the more
+/// important of those rules is that any iterator becomes invalid
+/// on element erasure. So pay attention.
+///
+/// Threading: not thread-safe. Expected to be used entirely by
+/// a single thread, typically a worker thread of some sort.
+
+struct HttpOpRetryCompare
+{
+ bool operator()(const HttpOpRequest * lhs, const HttpOpRequest * rhs)
+ {
+ return lhs->mPolicyRetryAt < rhs->mPolicyRetryAt;
+ }
+};
+
+
+typedef std::priority_queue<HttpOpRequest *,
+ std::deque<HttpOpRequest *>,
+ LLCore::HttpOpRetryCompare> HttpRetryQueueBase;
+
+class HttpRetryQueue : public HttpRetryQueueBase
+{
+public:
+ HttpRetryQueue()
+ : HttpRetryQueueBase()
+ {}
+
+ ~HttpRetryQueue()
+ {}
+
+protected:
+ HttpRetryQueue(const HttpRetryQueue &); // Not defined
+ void operator=(const HttpRetryQueue &); // Not defined
+
+public:
+ const container_type & get_container() const
+ {
+ return c;
+ }
+
+ container_type & get_container()
+ {
+ return c;
+ }
+
+}; // end class HttpRetryQueue
+
+
+} // end namespace LLCore
+
+
+#endif // _LLCORE_HTTP_RETRY_QUEUE_H_
diff --git a/indra/llcorehttp/httpcommon.h b/indra/llcorehttp/httpcommon.h
index fd2661b700..42b75edb41 100644
--- a/indra/llcorehttp/httpcommon.h
+++ b/indra/llcorehttp/httpcommon.h
@@ -114,6 +114,9 @@ namespace LLCore
typedef void * HttpHandle;
#define LLCORE_HTTP_HANDLE_INVALID (NULL)
+/// For internal scheduling and metrics, we use a microsecond
+/// timebase compatible with the environment.
+typedef U64 HttpTime;
/// Error codes defined by the library itself as distinct from
/// libcurl (or any other transport provider).
@@ -180,6 +183,15 @@ struct HttpStatus
mStatus(status)
{}
+ HttpStatus(int http_status)
+ : mType(http_status),
+ mStatus(http_status >= 200 && http_status <= 299
+ ? HE_SUCCESS
+ : HE_REPLY_ERROR)
+ {
+ llassert(http_status >= 100 && http_status <= 999);
+ }
+
HttpStatus(const HttpStatus & rhs)
: mType(rhs.mType),
mStatus(rhs.mStatus)
diff --git a/indra/llcorehttp/tests/test_httprequest.hpp b/indra/llcorehttp/tests/test_httprequest.hpp
index 0e9d7d8979..2d91b95347 100644
--- a/indra/llcorehttp/tests/test_httprequest.hpp
+++ b/indra/llcorehttp/tests/test_httprequest.hpp
@@ -381,7 +381,7 @@ void HttpRequestTestObjectType::test<5>()
// Run the notification pump.
int count(0);
- int limit(20);
+ int limit(180); // With retries, can take more than 10 seconds to give up
while (count++ < limit && mHandlerCalls < 1)
{
req->update(1000);
diff --git a/indra/newview/lltexturefetch.cpp b/indra/newview/lltexturefetch.cpp
index 92b847345d..4a46ea0e97 100644
--- a/indra/newview/lltexturefetch.cpp
+++ b/indra/newview/lltexturefetch.cpp
@@ -67,9 +67,53 @@
// This is an attempt to document what's going on in here after-the-fact.
// It's a sincere attempt to be accurate but there will be mistakes.
//
+//
// Purpose
//
-// (What is this solving?)
+// What is this module trying to do? It accepts requests to load textures
+// at a given priority and discard level and notifies the caller when done
+// (successfully or not). Additional constraints are:
+//
+// * Support a local texture cache. Don't hit network when possible
+// to avoid it.
+// * Use UDP or HTTP as directed or as fallback. HTTP is tried when
+// not disabled and a URL is available. UDP when a URL isn't
+// available or HTTP attempts fail.
+// * Asynchronous (using threads). Main thread is not to be blocked or
+// burdened.
+// * High concurrency. Many requests need to be in-flight and at various
+// stages of completion.
+// * Tolerate frequent re-prioritizations of requests. Priority is
+// a reflection of a camera's viewpoint and as that viewpoint changes,
+// objects and textures become more and less relevant and that is
+// expressed at this level by priority changes and request cancelations.
+//
+// The caller interfaces that fall out of the above and shape the
+// implementation are:
+// * createRequest - Load j2c image via UDP or HTTP at given discard level and priority
+// * deleteRequest - Request removal of prior request
+// * getRequestFinished - Test if request is finished returning data to caller
+// * updateRequestPriority - Change priority of existing request
+// * getFetchState - Retrieve progress on existing request
+//
+// Everything else in here is mostly plumbing, metrics and debug.
+//
+//
+// The Work Queue
+//
+// The two central classes are LLTextureFetch and LLTextureFetchWorker.
+// LLTextureFetch combines threading with a priority queue of work
+// requests. The priority queue is sorted by a U32 priority derived
+// from the F32 priority in the APIs. The *only* work request that
+// receives service time by this thread is the highest priority
+// request. All others wait until it is complete or a dynamic priority
+// change has re-ordered work.
+//
+// LLTextureFetchWorker implements the work request and is 1:1 with
+// texture fetch requests. Embedded in each is a state machine that
+// walks it through the cache, HTTP, UDP, image decode and retry
+// steps of texture acquisition.
+//
//
// Threads
//
@@ -83,6 +127,7 @@
// 5. Tid Image decoder's worker thread
// 6. Thl HTTP library's worker thread
//
+//
// Mutexes/Condition Variables
//
// 1. Mt Mutex defined for LLThread's condition variable (base class of
@@ -98,6 +143,7 @@
// LLTextureFetchWorker). One per request.
// 7. Mw LLTextureFetchWorker's mutex. One per request.
//
+//
// Lock Ordering Rules
//
// Not an exhaustive list but shows the order of lock acquisition
@@ -105,6 +151,8 @@
// acquiring 'B'.
//
// 1. Mw < Mfnq
+// (there are many more...)
+//
//
// Method and Member Definitions
//
@@ -124,7 +172,10 @@
// comment can mean the member is unlocked or that I didn't bother
// to do the archaeology. In the case of LLTextureFetchWorker,
// most data members added by the leaf class are actually covered
-// by the Mw lock.
+// by the Mw lock. You may also see "// T<xxx>" which means that
+// the member's usage is restricted to one thread (except for
+// perhaps construction and destruction) and so explicit locking
+// isn't used.
//
// In code, a trailing comment like "// [-+]M<xxx>" indicates a
// lock acquision or release point.
@@ -132,27 +183,54 @@
//
// Worker Lifecycle
//
-// (Can't unilaterally delete, cleanup is two-phase, etc.)
+// The threading and responder model makes it very likely that
+// other components are holding on to a pointer to a worker request.
+// So, uncoordinated deletions of requests is a guarantee of memory
+// corruption in a short time. So destroying a request involves
+// invocations's of LLQueuedThread/LLWorkerThread's abort/stop
+// logic that removes workers and puts them ona delete queue for
+// 2-phase destruction. That second phase is deferrable by calls
+// to deleteOK() which only allow final destruction (via dtor)
+// once deleteOK has determined that the request is in a safe
+// state.
+//
//
// Worker State Machine
//
// (ASCII art needed)
//
+//
// Priority Scheme
//
// [PRIORITY_LOW, PRIORITY_NORMAL) - for WAIT_HTTP_RESOURCE state
// [PRIORITY_NORMAL, PRIORITY_HIGH) - waiting for external event
-// [PRIORITY_HIGH, PRIORITY_URGENT) - rapidly transitioning through states,
+// [PRIORITY_HIGH, PRIORITY_URGENT) - External event delivered,
+// rapidly transitioning through states,
// no waiting allowed
//
+// By itself, the above work queue model would fail the concurrency
+// and liveness requirements of the interface. A high priority
+// request could find itself on the head and stalled for external
+// reasons (see VWR-28996). So a few additional constraints are
+// required to keep things running:
+// * Anything that can make forward progress must be kept at a
+// higher priority than anything that can't.
+// * On completion of external events, the associated request
+// needs to be elevated beyond the normal range to handle
+// any data delivery and release any external resource.
+//
+// This effort is made to keep higher-priority entities moving
+// forward in their state machines at every possible step of
+// processing. It's not entirely proven that this produces the
+// experiencial benefits promised.
//
//////////////////////////////////////////////////////////////////////////////
// Tuning/Parameterization Constants
-static const S32 HTTP_REQUESTS_IN_QUEUE_HIGH_WATER = 40;
-static const S32 HTTP_REQUESTS_IN_QUEUE_LOW_WATER = 20;
+static const S32 HTTP_REQUESTS_IN_QUEUE_HIGH_WATER = 40; // Maximum requests to have active in HTTP
+static const S32 HTTP_REQUESTS_IN_QUEUE_LOW_WATER = 20; // Active level at which to refill
//////////////////////////////////////////////////////////////////////////////
@@ -425,7 +503,6 @@ private:
BOOL mInLocalCache;
bool mCanUseHTTP ;
bool mCanUseNET ; //can get from asset server.
- S32 mHTTPFailCount;
S32 mRetryAttempt;
S32 mActiveCount;
LLCore::HttpStatus mGetStatus;
@@ -745,7 +822,6 @@ LLTextureFetchWorker::LLTextureFetchWorker(LLTextureFetch* fetcher,
mHaveAllData(FALSE),
mInLocalCache(FALSE),
mCanUseHTTP(true),
- mHTTPFailCount(0),
mRetryAttempt(0),
mActiveCount(0),
mWorkMutex(NULL),
@@ -936,6 +1012,9 @@ void LLTextureFetchWorker::startWork(S32 param)
// Threads: Ttf
bool LLTextureFetchWorker::doWork(S32 param)
{
+ static const LLCore::HttpStatus http_not_found(HTTP_NOT_FOUND);
+ static const LLCore::HttpStatus http_service_unavail(HTTP_SERVICE_UNAVAILABLE);
+
// Release waiters while we aren't holding the Mw lock.
mFetcher->releaseHttpWaiters();
@@ -1286,7 +1365,6 @@ bool LLTextureFetchWorker::doWork(S32 param)
{
llwarns << "HTTP GET request failed for " << mID << llendl;
resetFormattedData();
- ++mHTTPFailCount;
return true; // failed
}
@@ -1313,10 +1391,8 @@ bool LLTextureFetchWorker::doWork(S32 param)
S32 cur_size = mFormattedImage.notNull() ? mFormattedImage->getDataSize() : 0;
if (mRequestedSize < 0)
{
- S32 max_attempts;
- if (mGetStatus == LLCore::HttpStatus(HTTP_NOT_FOUND, LLCore::HE_REPLY_ERROR))
+ if (http_not_found == mGetStatus)
{
- mHTTPFailCount = max_attempts = 1; // Don't retry
llwarns << "Texture missing from server (404): " << mUrl << llendl;
// roll back to try UDP
@@ -1328,47 +1404,32 @@ bool LLTextureFetchWorker::doWork(S32 param)
return false;
}
}
- else if (mGetStatus == LLCore::HttpStatus(HTTP_SERVICE_UNAVAILABLE, LLCore::HE_REPLY_ERROR))
+ else if (http_service_unavail == mGetStatus)
{
- // *TODO: Should probably introduce a timer here to delay future HTTP requsts
- // for a short time (~1s) to ease server load? Ideally the server would queue
- // requests instead of returning 503... we already limit the number pending.
- ++mHTTPFailCount;
- max_attempts = mHTTPFailCount+1; // Keep retrying
LL_INFOS_ONCE("Texture") << "Texture server busy (503): " << mUrl << LL_ENDL;
}
else
{
- const S32 HTTP_MAX_RETRY_COUNT = 3;
- max_attempts = HTTP_MAX_RETRY_COUNT + 1;
- ++mHTTPFailCount;
llinfos << "HTTP GET failed for: " << mUrl
<< " Status: " << mGetStatus.toHex()
<< " Reason: '" << mGetReason << "'"
- << " Attempt:" << mHTTPFailCount+1 << "/" << max_attempts << llendl;
+ // *FIXME: Add retry info for reporting purposes...
+ // << " Attempt:" << mHTTPFailCount+1 << "/" << max_attempts
+ << llendl;
}
- if (mHTTPFailCount >= max_attempts)
- {
- if (cur_size > 0)
- {
- // Use available data
- mLoadedDiscard = mFormattedImage->getDiscardLevel();
- mState = DECODE_IMAGE;
- return false;
- }
- else
- {
- resetFormattedData();
- mState = DONE;
- return true; // failed
- }
- }
- else
+ if (cur_size > 0)
{
- mState = SEND_HTTP_REQ;
- return false; // retry
+ // Use available data
+ mLoadedDiscard = mFormattedImage->getDiscardLevel();
+ mState = DECODE_IMAGE;
+ return false;
}
+
+ // Fail harder
+ resetFormattedData();
+ mState = DONE;
+ return true; // failed
}
if (! mHttpBufferArray || ! mHttpBufferArray->size())
@@ -1649,7 +1710,7 @@ void LLTextureFetchWorker::onCompleted(LLCore::HttpHandle handle, LLCore::HttpRe
}
else
{
- static const LLCore::HttpStatus par_status(LLCore::HttpStatus(HTTP_PARTIAL_CONTENT, LLCore::HE_SUCCESS));
+ static const LLCore::HttpStatus par_status(HTTP_PARTIAL_CONTENT);
partial = (par_status == status);
}
diff --git a/indra/newview/lltexturefetch.h b/indra/newview/lltexturefetch.h
index 53b0f7885f..4ee13d171e 100644
--- a/indra/newview/lltexturefetch.h
+++ b/indra/newview/lltexturefetch.h
@@ -308,9 +308,9 @@ private:
// Interfaces and objects into the core http library used
// to make our HTTP requests. These replace the various
// LLCurl interfaces used in the past.
- LLCore::HttpRequest * mHttpRequest;
- LLCore::HttpOptions * mHttpOptions;
- LLCore::HttpHeaders * mHttpHeaders;
+ LLCore::HttpRequest * mHttpRequest; // Ttf
+ LLCore::HttpOptions * mHttpOptions; // Ttf
+ LLCore::HttpHeaders * mHttpHeaders; // Ttf
typedef std::set<LLUUID> wait_http_res_queue_t;
wait_http_res_queue_t mHttpWaitResource; // Mfnq