diff options
Diffstat (limited to 'indra/newview/lltexturefetch.cpp')
-rw-r--r-- | indra/newview/lltexturefetch.cpp | 145 |
1 files changed, 103 insertions, 42 deletions
diff --git a/indra/newview/lltexturefetch.cpp b/indra/newview/lltexturefetch.cpp index 92b847345d..4a46ea0e97 100644 --- a/indra/newview/lltexturefetch.cpp +++ b/indra/newview/lltexturefetch.cpp @@ -67,9 +67,53 @@ // This is an attempt to document what's going on in here after-the-fact. // It's a sincere attempt to be accurate but there will be mistakes. // +// // Purpose // -// (What is this solving?) +// What is this module trying to do? It accepts requests to load textures +// at a given priority and discard level and notifies the caller when done +// (successfully or not). Additional constraints are: +// +// * Support a local texture cache. Don't hit network when possible +// to avoid it. +// * Use UDP or HTTP as directed or as fallback. HTTP is tried when +// not disabled and a URL is available. UDP when a URL isn't +// available or HTTP attempts fail. +// * Asynchronous (using threads). Main thread is not to be blocked or +// burdened. +// * High concurrency. Many requests need to be in-flight and at various +// stages of completion. +// * Tolerate frequent re-prioritizations of requests. Priority is +// a reflection of a camera's viewpoint and as that viewpoint changes, +// objects and textures become more and less relevant and that is +// expressed at this level by priority changes and request cancelations. +// +// The caller interfaces that fall out of the above and shape the +// implementation are: +// * createRequest - Load j2c image via UDP or HTTP at given discard level and priority +// * deleteRequest - Request removal of prior request +// * getRequestFinished - Test if request is finished returning data to caller +// * updateRequestPriority - Change priority of existing request +// * getFetchState - Retrieve progress on existing request +// +// Everything else in here is mostly plumbing, metrics and debug. +// +// +// The Work Queue +// +// The two central classes are LLTextureFetch and LLTextureFetchWorker. +// LLTextureFetch combines threading with a priority queue of work +// requests. The priority queue is sorted by a U32 priority derived +// from the F32 priority in the APIs. The *only* work request that +// receives service time by this thread is the highest priority +// request. All others wait until it is complete or a dynamic priority +// change has re-ordered work. +// +// LLTextureFetchWorker implements the work request and is 1:1 with +// texture fetch requests. Embedded in each is a state machine that +// walks it through the cache, HTTP, UDP, image decode and retry +// steps of texture acquisition. +// // // Threads // @@ -83,6 +127,7 @@ // 5. Tid Image decoder's worker thread // 6. Thl HTTP library's worker thread // +// // Mutexes/Condition Variables // // 1. Mt Mutex defined for LLThread's condition variable (base class of @@ -98,6 +143,7 @@ // LLTextureFetchWorker). One per request. // 7. Mw LLTextureFetchWorker's mutex. One per request. // +// // Lock Ordering Rules // // Not an exhaustive list but shows the order of lock acquisition @@ -105,6 +151,8 @@ // acquiring 'B'. // // 1. Mw < Mfnq +// (there are many more...) +// // // Method and Member Definitions // @@ -124,7 +172,10 @@ // comment can mean the member is unlocked or that I didn't bother // to do the archaeology. In the case of LLTextureFetchWorker, // most data members added by the leaf class are actually covered -// by the Mw lock. +// by the Mw lock. You may also see "// T<xxx>" which means that +// the member's usage is restricted to one thread (except for +// perhaps construction and destruction) and so explicit locking +// isn't used. // // In code, a trailing comment like "// [-+]M<xxx>" indicates a // lock acquision or release point. @@ -132,27 +183,54 @@ // // Worker Lifecycle // -// (Can't unilaterally delete, cleanup is two-phase, etc.) +// The threading and responder model makes it very likely that +// other components are holding on to a pointer to a worker request. +// So, uncoordinated deletions of requests is a guarantee of memory +// corruption in a short time. So destroying a request involves +// invocations's of LLQueuedThread/LLWorkerThread's abort/stop +// logic that removes workers and puts them ona delete queue for +// 2-phase destruction. That second phase is deferrable by calls +// to deleteOK() which only allow final destruction (via dtor) +// once deleteOK has determined that the request is in a safe +// state. +// // // Worker State Machine // // (ASCII art needed) // +// // Priority Scheme // // [PRIORITY_LOW, PRIORITY_NORMAL) - for WAIT_HTTP_RESOURCE state // [PRIORITY_NORMAL, PRIORITY_HIGH) - waiting for external event -// [PRIORITY_HIGH, PRIORITY_URGENT) - rapidly transitioning through states, +// [PRIORITY_HIGH, PRIORITY_URGENT) - External event delivered, +// rapidly transitioning through states, // no waiting allowed // +// By itself, the above work queue model would fail the concurrency +// and liveness requirements of the interface. A high priority +// request could find itself on the head and stalled for external +// reasons (see VWR-28996). So a few additional constraints are +// required to keep things running: +// * Anything that can make forward progress must be kept at a +// higher priority than anything that can't. +// * On completion of external events, the associated request +// needs to be elevated beyond the normal range to handle +// any data delivery and release any external resource. +// +// This effort is made to keep higher-priority entities moving +// forward in their state machines at every possible step of +// processing. It's not entirely proven that this produces the +// experiencial benefits promised. // ////////////////////////////////////////////////////////////////////////////// // Tuning/Parameterization Constants -static const S32 HTTP_REQUESTS_IN_QUEUE_HIGH_WATER = 40; -static const S32 HTTP_REQUESTS_IN_QUEUE_LOW_WATER = 20; +static const S32 HTTP_REQUESTS_IN_QUEUE_HIGH_WATER = 40; // Maximum requests to have active in HTTP +static const S32 HTTP_REQUESTS_IN_QUEUE_LOW_WATER = 20; // Active level at which to refill ////////////////////////////////////////////////////////////////////////////// @@ -425,7 +503,6 @@ private: BOOL mInLocalCache; bool mCanUseHTTP ; bool mCanUseNET ; //can get from asset server. - S32 mHTTPFailCount; S32 mRetryAttempt; S32 mActiveCount; LLCore::HttpStatus mGetStatus; @@ -745,7 +822,6 @@ LLTextureFetchWorker::LLTextureFetchWorker(LLTextureFetch* fetcher, mHaveAllData(FALSE), mInLocalCache(FALSE), mCanUseHTTP(true), - mHTTPFailCount(0), mRetryAttempt(0), mActiveCount(0), mWorkMutex(NULL), @@ -936,6 +1012,9 @@ void LLTextureFetchWorker::startWork(S32 param) // Threads: Ttf bool LLTextureFetchWorker::doWork(S32 param) { + static const LLCore::HttpStatus http_not_found(HTTP_NOT_FOUND); + static const LLCore::HttpStatus http_service_unavail(HTTP_SERVICE_UNAVAILABLE); + // Release waiters while we aren't holding the Mw lock. mFetcher->releaseHttpWaiters(); @@ -1286,7 +1365,6 @@ bool LLTextureFetchWorker::doWork(S32 param) { llwarns << "HTTP GET request failed for " << mID << llendl; resetFormattedData(); - ++mHTTPFailCount; return true; // failed } @@ -1313,10 +1391,8 @@ bool LLTextureFetchWorker::doWork(S32 param) S32 cur_size = mFormattedImage.notNull() ? mFormattedImage->getDataSize() : 0; if (mRequestedSize < 0) { - S32 max_attempts; - if (mGetStatus == LLCore::HttpStatus(HTTP_NOT_FOUND, LLCore::HE_REPLY_ERROR)) + if (http_not_found == mGetStatus) { - mHTTPFailCount = max_attempts = 1; // Don't retry llwarns << "Texture missing from server (404): " << mUrl << llendl; // roll back to try UDP @@ -1328,47 +1404,32 @@ bool LLTextureFetchWorker::doWork(S32 param) return false; } } - else if (mGetStatus == LLCore::HttpStatus(HTTP_SERVICE_UNAVAILABLE, LLCore::HE_REPLY_ERROR)) + else if (http_service_unavail == mGetStatus) { - // *TODO: Should probably introduce a timer here to delay future HTTP requsts - // for a short time (~1s) to ease server load? Ideally the server would queue - // requests instead of returning 503... we already limit the number pending. - ++mHTTPFailCount; - max_attempts = mHTTPFailCount+1; // Keep retrying LL_INFOS_ONCE("Texture") << "Texture server busy (503): " << mUrl << LL_ENDL; } else { - const S32 HTTP_MAX_RETRY_COUNT = 3; - max_attempts = HTTP_MAX_RETRY_COUNT + 1; - ++mHTTPFailCount; llinfos << "HTTP GET failed for: " << mUrl << " Status: " << mGetStatus.toHex() << " Reason: '" << mGetReason << "'" - << " Attempt:" << mHTTPFailCount+1 << "/" << max_attempts << llendl; + // *FIXME: Add retry info for reporting purposes... + // << " Attempt:" << mHTTPFailCount+1 << "/" << max_attempts + << llendl; } - if (mHTTPFailCount >= max_attempts) - { - if (cur_size > 0) - { - // Use available data - mLoadedDiscard = mFormattedImage->getDiscardLevel(); - mState = DECODE_IMAGE; - return false; - } - else - { - resetFormattedData(); - mState = DONE; - return true; // failed - } - } - else + if (cur_size > 0) { - mState = SEND_HTTP_REQ; - return false; // retry + // Use available data + mLoadedDiscard = mFormattedImage->getDiscardLevel(); + mState = DECODE_IMAGE; + return false; } + + // Fail harder + resetFormattedData(); + mState = DONE; + return true; // failed } if (! mHttpBufferArray || ! mHttpBufferArray->size()) @@ -1649,7 +1710,7 @@ void LLTextureFetchWorker::onCompleted(LLCore::HttpHandle handle, LLCore::HttpRe } else { - static const LLCore::HttpStatus par_status(LLCore::HttpStatus(HTTP_PARTIAL_CONTENT, LLCore::HE_SUCCESS)); + static const LLCore::HttpStatus par_status(HTTP_PARTIAL_CONTENT); partial = (par_status == status); } |