summaryrefslogtreecommitdiff
path: root/indra/newview/lltexturefetch.cpp
diff options
context:
space:
mode:
authorMonty Brandenberg <monty@lindenlab.com>2012-06-08 20:21:54 -0400
committerMonty Brandenberg <monty@lindenlab.com>2012-06-08 20:21:54 -0400
commit28a04400b4160dd34166483ddcf0c12637bcc363 (patch)
treee43ca202df6ed9bc8cbcc4a97d354e4798b7aa62 /indra/newview/lltexturefetch.cpp
parent1e3d05329f2e823191c7c91926bee5ec9e5dc4d7 (diff)
Implemented HTTP retry for requests. Went in rather easily which
surprised me. Added a retry queue similar to ready queue to the policy object which is sorted by retry time. Currently do five retries (after the initial try) delayed by .25, .5, 1, 2 and 5 seconds. Removed the retry logic from the lltexturefetch module. Upped the waiting time in the unit test for the retries. People won't like this but tough, need tests.
Diffstat (limited to 'indra/newview/lltexturefetch.cpp')
-rw-r--r--indra/newview/lltexturefetch.cpp145
1 files changed, 103 insertions, 42 deletions
diff --git a/indra/newview/lltexturefetch.cpp b/indra/newview/lltexturefetch.cpp
index 92b847345d..4a46ea0e97 100644
--- a/indra/newview/lltexturefetch.cpp
+++ b/indra/newview/lltexturefetch.cpp
@@ -67,9 +67,53 @@
// This is an attempt to document what's going on in here after-the-fact.
// It's a sincere attempt to be accurate but there will be mistakes.
//
+//
// Purpose
//
-// (What is this solving?)
+// What is this module trying to do? It accepts requests to load textures
+// at a given priority and discard level and notifies the caller when done
+// (successfully or not). Additional constraints are:
+//
+// * Support a local texture cache. Don't hit network when possible
+// to avoid it.
+// * Use UDP or HTTP as directed or as fallback. HTTP is tried when
+// not disabled and a URL is available. UDP when a URL isn't
+// available or HTTP attempts fail.
+// * Asynchronous (using threads). Main thread is not to be blocked or
+// burdened.
+// * High concurrency. Many requests need to be in-flight and at various
+// stages of completion.
+// * Tolerate frequent re-prioritizations of requests. Priority is
+// a reflection of a camera's viewpoint and as that viewpoint changes,
+// objects and textures become more and less relevant and that is
+// expressed at this level by priority changes and request cancelations.
+//
+// The caller interfaces that fall out of the above and shape the
+// implementation are:
+// * createRequest - Load j2c image via UDP or HTTP at given discard level and priority
+// * deleteRequest - Request removal of prior request
+// * getRequestFinished - Test if request is finished returning data to caller
+// * updateRequestPriority - Change priority of existing request
+// * getFetchState - Retrieve progress on existing request
+//
+// Everything else in here is mostly plumbing, metrics and debug.
+//
+//
+// The Work Queue
+//
+// The two central classes are LLTextureFetch and LLTextureFetchWorker.
+// LLTextureFetch combines threading with a priority queue of work
+// requests. The priority queue is sorted by a U32 priority derived
+// from the F32 priority in the APIs. The *only* work request that
+// receives service time by this thread is the highest priority
+// request. All others wait until it is complete or a dynamic priority
+// change has re-ordered work.
+//
+// LLTextureFetchWorker implements the work request and is 1:1 with
+// texture fetch requests. Embedded in each is a state machine that
+// walks it through the cache, HTTP, UDP, image decode and retry
+// steps of texture acquisition.
+//
//
// Threads
//
@@ -83,6 +127,7 @@
// 5. Tid Image decoder's worker thread
// 6. Thl HTTP library's worker thread
//
+//
// Mutexes/Condition Variables
//
// 1. Mt Mutex defined for LLThread's condition variable (base class of
@@ -98,6 +143,7 @@
// LLTextureFetchWorker). One per request.
// 7. Mw LLTextureFetchWorker's mutex. One per request.
//
+//
// Lock Ordering Rules
//
// Not an exhaustive list but shows the order of lock acquisition
@@ -105,6 +151,8 @@
// acquiring 'B'.
//
// 1. Mw < Mfnq
+// (there are many more...)
+//
//
// Method and Member Definitions
//
@@ -124,7 +172,10 @@
// comment can mean the member is unlocked or that I didn't bother
// to do the archaeology. In the case of LLTextureFetchWorker,
// most data members added by the leaf class are actually covered
-// by the Mw lock.
+// by the Mw lock. You may also see "// T<xxx>" which means that
+// the member's usage is restricted to one thread (except for
+// perhaps construction and destruction) and so explicit locking
+// isn't used.
//
// In code, a trailing comment like "// [-+]M<xxx>" indicates a
// lock acquision or release point.
@@ -132,27 +183,54 @@
//
// Worker Lifecycle
//
-// (Can't unilaterally delete, cleanup is two-phase, etc.)
+// The threading and responder model makes it very likely that
+// other components are holding on to a pointer to a worker request.
+// So, uncoordinated deletions of requests is a guarantee of memory
+// corruption in a short time. So destroying a request involves
+// invocations's of LLQueuedThread/LLWorkerThread's abort/stop
+// logic that removes workers and puts them ona delete queue for
+// 2-phase destruction. That second phase is deferrable by calls
+// to deleteOK() which only allow final destruction (via dtor)
+// once deleteOK has determined that the request is in a safe
+// state.
+//
//
// Worker State Machine
//
// (ASCII art needed)
//
+//
// Priority Scheme
//
// [PRIORITY_LOW, PRIORITY_NORMAL) - for WAIT_HTTP_RESOURCE state
// [PRIORITY_NORMAL, PRIORITY_HIGH) - waiting for external event
-// [PRIORITY_HIGH, PRIORITY_URGENT) - rapidly transitioning through states,
+// [PRIORITY_HIGH, PRIORITY_URGENT) - External event delivered,
+// rapidly transitioning through states,
// no waiting allowed
//
+// By itself, the above work queue model would fail the concurrency
+// and liveness requirements of the interface. A high priority
+// request could find itself on the head and stalled for external
+// reasons (see VWR-28996). So a few additional constraints are
+// required to keep things running:
+// * Anything that can make forward progress must be kept at a
+// higher priority than anything that can't.
+// * On completion of external events, the associated request
+// needs to be elevated beyond the normal range to handle
+// any data delivery and release any external resource.
+//
+// This effort is made to keep higher-priority entities moving
+// forward in their state machines at every possible step of
+// processing. It's not entirely proven that this produces the
+// experiencial benefits promised.
//
//////////////////////////////////////////////////////////////////////////////
// Tuning/Parameterization Constants
-static const S32 HTTP_REQUESTS_IN_QUEUE_HIGH_WATER = 40;
-static const S32 HTTP_REQUESTS_IN_QUEUE_LOW_WATER = 20;
+static const S32 HTTP_REQUESTS_IN_QUEUE_HIGH_WATER = 40; // Maximum requests to have active in HTTP
+static const S32 HTTP_REQUESTS_IN_QUEUE_LOW_WATER = 20; // Active level at which to refill
//////////////////////////////////////////////////////////////////////////////
@@ -425,7 +503,6 @@ private:
BOOL mInLocalCache;
bool mCanUseHTTP ;
bool mCanUseNET ; //can get from asset server.
- S32 mHTTPFailCount;
S32 mRetryAttempt;
S32 mActiveCount;
LLCore::HttpStatus mGetStatus;
@@ -745,7 +822,6 @@ LLTextureFetchWorker::LLTextureFetchWorker(LLTextureFetch* fetcher,
mHaveAllData(FALSE),
mInLocalCache(FALSE),
mCanUseHTTP(true),
- mHTTPFailCount(0),
mRetryAttempt(0),
mActiveCount(0),
mWorkMutex(NULL),
@@ -936,6 +1012,9 @@ void LLTextureFetchWorker::startWork(S32 param)
// Threads: Ttf
bool LLTextureFetchWorker::doWork(S32 param)
{
+ static const LLCore::HttpStatus http_not_found(HTTP_NOT_FOUND);
+ static const LLCore::HttpStatus http_service_unavail(HTTP_SERVICE_UNAVAILABLE);
+
// Release waiters while we aren't holding the Mw lock.
mFetcher->releaseHttpWaiters();
@@ -1286,7 +1365,6 @@ bool LLTextureFetchWorker::doWork(S32 param)
{
llwarns << "HTTP GET request failed for " << mID << llendl;
resetFormattedData();
- ++mHTTPFailCount;
return true; // failed
}
@@ -1313,10 +1391,8 @@ bool LLTextureFetchWorker::doWork(S32 param)
S32 cur_size = mFormattedImage.notNull() ? mFormattedImage->getDataSize() : 0;
if (mRequestedSize < 0)
{
- S32 max_attempts;
- if (mGetStatus == LLCore::HttpStatus(HTTP_NOT_FOUND, LLCore::HE_REPLY_ERROR))
+ if (http_not_found == mGetStatus)
{
- mHTTPFailCount = max_attempts = 1; // Don't retry
llwarns << "Texture missing from server (404): " << mUrl << llendl;
// roll back to try UDP
@@ -1328,47 +1404,32 @@ bool LLTextureFetchWorker::doWork(S32 param)
return false;
}
}
- else if (mGetStatus == LLCore::HttpStatus(HTTP_SERVICE_UNAVAILABLE, LLCore::HE_REPLY_ERROR))
+ else if (http_service_unavail == mGetStatus)
{
- // *TODO: Should probably introduce a timer here to delay future HTTP requsts
- // for a short time (~1s) to ease server load? Ideally the server would queue
- // requests instead of returning 503... we already limit the number pending.
- ++mHTTPFailCount;
- max_attempts = mHTTPFailCount+1; // Keep retrying
LL_INFOS_ONCE("Texture") << "Texture server busy (503): " << mUrl << LL_ENDL;
}
else
{
- const S32 HTTP_MAX_RETRY_COUNT = 3;
- max_attempts = HTTP_MAX_RETRY_COUNT + 1;
- ++mHTTPFailCount;
llinfos << "HTTP GET failed for: " << mUrl
<< " Status: " << mGetStatus.toHex()
<< " Reason: '" << mGetReason << "'"
- << " Attempt:" << mHTTPFailCount+1 << "/" << max_attempts << llendl;
+ // *FIXME: Add retry info for reporting purposes...
+ // << " Attempt:" << mHTTPFailCount+1 << "/" << max_attempts
+ << llendl;
}
- if (mHTTPFailCount >= max_attempts)
- {
- if (cur_size > 0)
- {
- // Use available data
- mLoadedDiscard = mFormattedImage->getDiscardLevel();
- mState = DECODE_IMAGE;
- return false;
- }
- else
- {
- resetFormattedData();
- mState = DONE;
- return true; // failed
- }
- }
- else
+ if (cur_size > 0)
{
- mState = SEND_HTTP_REQ;
- return false; // retry
+ // Use available data
+ mLoadedDiscard = mFormattedImage->getDiscardLevel();
+ mState = DECODE_IMAGE;
+ return false;
}
+
+ // Fail harder
+ resetFormattedData();
+ mState = DONE;
+ return true; // failed
}
if (! mHttpBufferArray || ! mHttpBufferArray->size())
@@ -1649,7 +1710,7 @@ void LLTextureFetchWorker::onCompleted(LLCore::HttpHandle handle, LLCore::HttpRe
}
else
{
- static const LLCore::HttpStatus par_status(LLCore::HttpStatus(HTTP_PARTIAL_CONTENT, LLCore::HE_SUCCESS));
+ static const LLCore::HttpStatus par_status(HTTP_PARTIAL_CONTENT);
partial = (par_status == status);
}