From 28a04400b4160dd34166483ddcf0c12637bcc363 Mon Sep 17 00:00:00 2001 From: Monty Brandenberg Date: Fri, 8 Jun 2012 20:21:54 -0400 Subject: Implemented HTTP retry for requests. Went in rather easily which surprised me. Added a retry queue similar to ready queue to the policy object which is sorted by retry time. Currently do five retries (after the initial try) delayed by .25, .5, 1, 2 and 5 seconds. Removed the retry logic from the lltexturefetch module. Upped the waiting time in the unit test for the retries. People won't like this but tough, need tests. --- indra/llcorehttp/_httplibcurl.cpp | 58 ++++++++++++++--- indra/llcorehttp/_httplibcurl.h | 4 +- indra/llcorehttp/_httpoprequest.cpp | 51 ++++++++++++--- indra/llcorehttp/_httpoprequest.h | 5 ++ indra/llcorehttp/_httppolicy.cpp | 98 +++++++++++++++++++++-------- indra/llcorehttp/_httppolicy.h | 19 +++++- indra/llcorehttp/_httpreadyqueue.h | 2 - indra/llcorehttp/_httpretryqueue.h | 94 +++++++++++++++++++++++++++ indra/llcorehttp/httpcommon.h | 12 ++++ indra/llcorehttp/tests/test_httprequest.hpp | 2 +- 10 files changed, 296 insertions(+), 49 deletions(-) create mode 100644 indra/llcorehttp/_httpretryqueue.h (limited to 'indra/llcorehttp') diff --git a/indra/llcorehttp/_httplibcurl.cpp b/indra/llcorehttp/_httplibcurl.cpp index 5272c391e8..05b2c2be69 100644 --- a/indra/llcorehttp/_httplibcurl.cpp +++ b/indra/llcorehttp/_httplibcurl.cpp @@ -29,6 +29,7 @@ #include "httpheaders.h" #include "bufferarray.h" #include "_httpoprequest.h" +#include "_httppolicy.h" namespace LLCore @@ -85,6 +86,8 @@ void HttpLibcurl::term() HttpService::ELoopSpeed HttpLibcurl::processTransport() { + HttpService::ELoopSpeed ret(HttpService::REQUEST_SLEEP); + // Give libcurl some cycles to do I/O & callbacks for (int policy_class(0); policy_class < HttpRequest::POLICY_CLASS_LIMIT; ++policy_class) { @@ -110,7 +113,8 @@ HttpService::ELoopSpeed HttpLibcurl::processTransport() CURL * handle(msg->easy_handle); CURLcode result(msg->data.result); - completeRequest(mMultiHandles[policy_class], handle, result); + HttpService::ELoopSpeed speed(completeRequest(mMultiHandles[policy_class], handle, result)); + ret = (std::min)(ret, speed); handle = NULL; // No longer valid on return } else if (CURLMSG_NONE == msg->msg) @@ -127,7 +131,11 @@ HttpService::ELoopSpeed HttpLibcurl::processTransport() } } - return mActiveOps.empty() ? HttpService::REQUEST_SLEEP : HttpService::NORMAL; + if (! mActiveOps.empty()) + { + ret = (std::min)(ret, HttpService::NORMAL); + } + return ret; } @@ -153,8 +161,12 @@ void HttpLibcurl::addOp(HttpOpRequest * op) } -void HttpLibcurl::completeRequest(CURLM * multi_handle, CURL * handle, CURLcode status) +HttpService::ELoopSpeed HttpLibcurl::completeRequest(CURLM * multi_handle, CURL * handle, CURLcode status) { + static const HttpStatus cant_connect(HttpStatus::EXT_CURL_EASY, CURLE_COULDNT_CONNECT); + static const HttpStatus cant_res_proxy(HttpStatus::EXT_CURL_EASY, CURLE_COULDNT_RESOLVE_PROXY); + static const HttpStatus cant_res_host(HttpStatus::EXT_CURL_EASY, CURLE_COULDNT_RESOLVE_HOST); + HttpOpRequest * op(NULL); curl_easy_getinfo(handle, CURLINFO_PRIVATE, &op); // *FIXME: check the pointer @@ -190,10 +202,7 @@ void HttpLibcurl::completeRequest(CURLM * multi_handle, CURL * handle, CURLcode int http_status(200); curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &http_status); - op->mStatus = LLCore::HttpStatus(http_status, - (http_status >= 200 && http_status <= 299 - ? HE_SUCCESS - : HE_REPLY_ERROR)); + op->mStatus = LLCore::HttpStatus(http_status); } // Detach from multi and recycle handle @@ -201,9 +210,42 @@ void HttpLibcurl::completeRequest(CURLM * multi_handle, CURL * handle, CURLcode curl_easy_cleanup(handle); op->mCurlHandle = NULL; - // Deliver to reply queue and release + // Retry or finalize + if (! op->mStatus) + { + // If this failed, we might want to retry. Have to inspect + // the status a little more deeply for those reasons worth retrying... + if (op->mPolicyRetries < op->mPolicyRetryLimit && + ((op->mStatus.isHttpStatus() && op->mStatus.mType >= 499 && op->mStatus.mType <= 599) || + cant_connect == op->mStatus || + cant_res_proxy == op->mStatus || + cant_res_host == op->mStatus)) + { + // Okay, worth a retry. We include 499 in this test as + // it's the old 'who knows?' error from many grid services... + HttpPolicy & policy(mService->getPolicy()); + + policy.retryOp(op); + return HttpService::NORMAL; // Having pushed to retry, keep things running + } + } + + // This op is done, finalize it delivering it to the reply queue... + if (! op->mStatus) + { + LL_WARNS("CoreHttp") << "URL op failed after " << op->mPolicyRetries + << " retries. Reason: " << op->mStatus.toString() + << LL_ENDL; + } + else if (op->mPolicyRetries) + { + LL_WARNS("CoreHttp") << "URL op succeeded after " << op->mPolicyRetries << " retries." + << LL_ENDL; + } + op->stageFromActive(mService); op->release(); + return HttpService::REQUEST_SLEEP; } diff --git a/indra/llcorehttp/_httplibcurl.h b/indra/llcorehttp/_httplibcurl.h index ec325c1946..fe628b9ab0 100644 --- a/indra/llcorehttp/_httplibcurl.h +++ b/indra/llcorehttp/_httplibcurl.h @@ -83,7 +83,9 @@ public: protected: /// Invoked when libcurl has indicated a request has been processed /// to completion and we need to move the request to a new state. - void completeRequest(CURLM * multi_handle, CURL * handle, CURLcode status); + HttpService::ELoopSpeed completeRequest(CURLM * multi_handle, + CURL * handle, + CURLcode status); protected: typedef std::set active_set_t; diff --git a/indra/llcorehttp/_httpoprequest.cpp b/indra/llcorehttp/_httpoprequest.cpp index 4bdc4a5257..895629c514 100644 --- a/indra/llcorehttp/_httpoprequest.cpp +++ b/indra/llcorehttp/_httpoprequest.cpp @@ -99,8 +99,15 @@ HttpOpRequest::HttpOpRequest() mReplyBody(NULL), mReplyOffset(0), mReplyLength(0), - mReplyHeaders(NULL) -{} + mReplyHeaders(NULL), + mPolicyRetries(0), + mPolicyRetryAt(HttpTime(0)), + mPolicyRetryLimit(5) // *FIXME: Get from policy definitions +{ + // *NOTE: As members are added, retry initialization/cleanup + // may need to be extended in @prepareRequest(). +} + HttpOpRequest::~HttpOpRequest() @@ -130,7 +137,6 @@ HttpOpRequest::~HttpOpRequest() } mCurlService = NULL; - if (mCurlHeaders) { @@ -313,6 +319,30 @@ HttpStatus HttpOpRequest::setupPost(HttpRequest::policy_t policy_id, HttpStatus HttpOpRequest::prepareRequest(HttpService * service) { + // Scrub transport and result data for retried op case + mCurlActive = false; + mCurlHandle = NULL; + mCurlService = NULL; + if (mCurlHeaders) + { + curl_slist_free_all(mCurlHeaders); + mCurlHeaders = NULL; + } + mCurlBodyPos = 0; + + if (mReplyBody) + { + mReplyBody->release(); + mReplyBody = NULL; + } + mReplyOffset = 0; + mReplyLength = 0; + if (mReplyHeaders) + { + mReplyHeaders->release(); + mReplyHeaders = NULL; + } + // *FIXME: better error handling later HttpStatus status; @@ -321,6 +351,7 @@ HttpStatus HttpOpRequest::prepareRequest(HttpService * service) mCurlHandle = curl_easy_init(); // curl_easy_setopt(mCurlHandle, CURLOPT_VERBOSE, 1); + curl_easy_setopt(mCurlHandle, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4); curl_easy_setopt(mCurlHandle, CURLOPT_TIMEOUT, 30); curl_easy_setopt(mCurlHandle, CURLOPT_CONNECTTIMEOUT, 30); curl_easy_setopt(mCurlHandle, CURLOPT_NOSIGNAL, 1); @@ -403,12 +434,7 @@ HttpStatus HttpOpRequest::prepareRequest(HttpService * service) break; } - if (mReqHeaders) - { - mCurlHeaders = append_headers_to_slist(mReqHeaders, mCurlHeaders); - } - mCurlHeaders = curl_slist_append(mCurlHeaders, "Pragma:"); - + // There's a CURLOPT for this now... if ((mReqOffset || mReqLength) && HOR_GET == mReqMethod) { static const char * const fmt1("Range: bytes=%lu-%lu"); @@ -428,6 +454,13 @@ HttpStatus HttpOpRequest::prepareRequest(HttpService * service) range_line[sizeof(range_line) - 1] = '\0'; mCurlHeaders = curl_slist_append(mCurlHeaders, range_line); } + + mCurlHeaders = curl_slist_append(mCurlHeaders, "Pragma:"); + if (mReqHeaders) + { + // Caller's headers last to override + mCurlHeaders = append_headers_to_slist(mReqHeaders, mCurlHeaders); + } curl_easy_setopt(mCurlHandle, CURLOPT_HTTPHEADER, mCurlHeaders); if (mProcFlags & (PF_SCAN_RANGE_HEADER | PF_SAVE_HEADERS)) diff --git a/indra/llcorehttp/_httpoprequest.h b/indra/llcorehttp/_httpoprequest.h index 0cad4e8459..6dcf30ca0c 100644 --- a/indra/llcorehttp/_httpoprequest.h +++ b/indra/llcorehttp/_httpoprequest.h @@ -128,6 +128,11 @@ public: off_t mReplyOffset; size_t mReplyLength; HttpHeaders * mReplyHeaders; + + // Policy data + int mPolicyRetries; + HttpTime mPolicyRetryAt; + const int mPolicyRetryLimit; }; // end class HttpOpRequest diff --git a/indra/llcorehttp/_httppolicy.cpp b/indra/llcorehttp/_httppolicy.cpp index 51f5e487dc..1f4cd34a4b 100644 --- a/indra/llcorehttp/_httppolicy.cpp +++ b/indra/llcorehttp/_httppolicy.cpp @@ -24,39 +24,46 @@ * $/LicenseInfo$ */ +#include "linden_common.h" + #include "_httppolicy.h" #include "_httpoprequest.h" #include "_httpservice.h" #include "_httplibcurl.h" +#include "lltimer.h" + namespace LLCore { HttpPolicy::HttpPolicy(HttpService * service) : mService(service) -{ - for (int policy_class(0); policy_class < HttpRequest::POLICY_CLASS_LIMIT; ++policy_class) - { - mReadyInClass[policy_class] = 0; - } -} +{} HttpPolicy::~HttpPolicy() { - for (int policy_class(0); policy_class < HttpRequest::POLICY_CLASS_LIMIT; ++policy_class) + for (int policy_class(0); policy_class < LL_ARRAY_SIZE(mState); ++policy_class) { - HttpReadyQueue & readyq(mReadyQueue[policy_class]); + HttpRetryQueue & retryq(mState[policy_class].mRetryQueue); + while (! retryq.empty()) + { + HttpOpRequest * op(retryq.top()); + op->cancel(); + op->release(); + retryq.pop(); + } + + HttpReadyQueue & readyq(mState[policy_class].mReadyQueue); while (! readyq.empty()) { HttpOpRequest * op(readyq.top()); op->cancel(); op->release(); - mReadyInClass[policy_class]--; readyq.pop(); } } @@ -68,27 +75,69 @@ void HttpPolicy::addOp(HttpOpRequest * op) { const int policy_class(op->mReqPolicy); - mReadyQueue[policy_class].push(op); - ++mReadyInClass[policy_class]; + op->mPolicyRetries = 0; + mState[policy_class].mReadyQueue.push(op); +} + + +void HttpPolicy::retryOp(HttpOpRequest * op) +{ + static const HttpTime retry_deltas[] = + { + 250000, // 1st retry in 0.25 S, etc... + 500000, + 1000000, + 2000000, + 5000000 // ... to every 5.0 S. + }; + static const int delta_max(int(LL_ARRAY_SIZE(retry_deltas)) - 1); + + const HttpTime now(totalTime()); + const int policy_class(op->mReqPolicy); + + const HttpTime delta(retry_deltas[llclamp(op->mPolicyRetries, 0, delta_max)]); + op->mPolicyRetryAt = now + delta; + ++op->mPolicyRetries; + LL_WARNS("CoreHttp") << "URL op retry #" << op->mPolicyRetries + << " being scheduled for " << delta << " uSecs from now." + << LL_ENDL; + mState[policy_class].mRetryQueue.push(op); } HttpService::ELoopSpeed HttpPolicy::processReadyQueue() { + const HttpTime now(totalTime()); HttpService::ELoopSpeed result(HttpService::REQUEST_SLEEP); HttpLibcurl & transport(mService->getTransport()); - for (int policy_class(0); policy_class < HttpRequest::POLICY_CLASS_LIMIT; ++policy_class) + for (int policy_class(0); policy_class < LL_ARRAY_SIZE(mState); ++policy_class) { - HttpReadyQueue & readyq(mReadyQueue[policy_class]); int active(transport.getActiveCountInClass(policy_class)); int needed(8 - active); - if (needed > 0 && mReadyInClass[policy_class] > 0) + HttpRetryQueue & retryq(mState[policy_class].mRetryQueue); + HttpReadyQueue & readyq(mState[policy_class].mReadyQueue); + + if (needed > 0) { - // Scan ready queue for requests that match policy - - while (! readyq.empty() && needed > 0 && mReadyInClass[policy_class] > 0) + // First see if we have any retries... + while (needed > 0 && ! retryq.empty()) + { + HttpOpRequest * op(retryq.top()); + if (op->mPolicyRetryAt > now) + break; + + retryq.pop(); + + op->stageFromReady(mService); + op->release(); + + --needed; + } + + // Now go on to the new requests... + while (needed > 0 && ! readyq.empty()) { HttpOpRequest * op(readyq.top()); readyq.pop(); @@ -96,17 +145,16 @@ HttpService::ELoopSpeed HttpPolicy::processReadyQueue() op->stageFromReady(mService); op->release(); - --mReadyInClass[policy_class]; --needed; } } - - if (! readyq.empty()) + + if (! readyq.empty() || ! retryq.empty()) { // If anything is ready, continue looping... result = (std::min)(result, HttpService::NORMAL); } - } + } // end foreach policy_class return result; } @@ -114,9 +162,9 @@ HttpService::ELoopSpeed HttpPolicy::processReadyQueue() bool HttpPolicy::changePriority(HttpHandle handle, HttpRequest::priority_t priority) { - for (int policy_class(0); policy_class < HttpRequest::POLICY_CLASS_LIMIT; ++policy_class) + for (int policy_class(0); policy_class < LL_ARRAY_SIZE(mState); ++policy_class) { - HttpReadyQueue::container_type & c(mReadyQueue[policy_class].get_container()); + HttpReadyQueue::container_type & c(mState[policy_class].mReadyQueue.get_container()); // Scan ready queue for requests that match policy for (HttpReadyQueue::container_type::iterator iter(c.begin()); c.end() != iter;) @@ -126,9 +174,9 @@ bool HttpPolicy::changePriority(HttpHandle handle, HttpRequest::priority_t prior if (static_cast(*cur) == handle) { HttpOpRequest * op(*cur); - c.erase(cur); // All iterators are now invalidated + c.erase(cur); // All iterators are now invalidated op->mReqPriority = priority; - mReadyQueue[policy_class].push(op); // Re-insert using adapter class + mState[policy_class].mReadyQueue.push(op); // Re-insert using adapter class return true; } } diff --git a/indra/llcorehttp/_httppolicy.h b/indra/llcorehttp/_httppolicy.h index 425079ec63..6f18264f3d 100644 --- a/indra/llcorehttp/_httppolicy.h +++ b/indra/llcorehttp/_httppolicy.h @@ -31,6 +31,7 @@ #include "httprequest.h" #include "_httpservice.h" #include "_httpreadyqueue.h" +#include "_httpretryqueue.h" #include "_httppolicyglobal.h" @@ -67,6 +68,14 @@ public: /// additional references will be added.) void addOp(HttpOpRequest *); + /// Similar to addOp, used when a caller wants to retry a + /// request that has failed. It's placed on a special retry + /// queue but ordered by retry time not priority. Otherwise, + /// handling is the same and retried operations are considered + /// before new ones but that doesn't guarantee completion + /// order. + void retryOp(HttpOpRequest *); + // Shadows HttpService's method bool changePriority(HttpHandle handle, HttpRequest::priority_t priority); @@ -77,10 +86,14 @@ public: return mGlobalOptions; } - protected: - int mReadyInClass[HttpRequest::POLICY_CLASS_LIMIT]; - HttpReadyQueue mReadyQueue[HttpRequest::POLICY_CLASS_LIMIT]; + struct State + { + HttpReadyQueue mReadyQueue; + HttpRetryQueue mRetryQueue; + }; + + State mState[HttpRequest::POLICY_CLASS_LIMIT]; HttpService * mService; // Naked pointer, not refcounted, not owner HttpPolicyGlobal mGlobalOptions; diff --git a/indra/llcorehttp/_httpreadyqueue.h b/indra/llcorehttp/_httpreadyqueue.h index 2cd96aefe3..87828834dc 100644 --- a/indra/llcorehttp/_httpreadyqueue.h +++ b/indra/llcorehttp/_httpreadyqueue.h @@ -36,8 +36,6 @@ namespace LLCore { -class HttpOpRequest; - /// HttpReadyQueue provides a simple priority queue for HttpOpRequest objects. /// /// This uses the priority_queue adaptor class to provide the queue diff --git a/indra/llcorehttp/_httpretryqueue.h b/indra/llcorehttp/_httpretryqueue.h new file mode 100644 index 0000000000..745adec09d --- /dev/null +++ b/indra/llcorehttp/_httpretryqueue.h @@ -0,0 +1,94 @@ +/** + * @file _httpretryqueue.h + * @brief Internal declaration for the operation retry queue + * + * $LicenseInfo:firstyear=2012&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2012, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ + +#ifndef _LLCORE_HTTP_RETRY_QUEUE_H_ +#define _LLCORE_HTTP_RETRY_QUEUE_H_ + + +#include + +#include "_httpoprequest.h" + + +namespace LLCore +{ + +/// HttpRetryQueue provides a simple priority queue for HttpOpRequest objects. +/// +/// This uses the priority_queue adaptor class to provide the queue +/// as well as the ordering scheme while allowing us access to the +/// raw container if we follow a few simple rules. One of the more +/// important of those rules is that any iterator becomes invalid +/// on element erasure. So pay attention. +/// +/// Threading: not thread-safe. Expected to be used entirely by +/// a single thread, typically a worker thread of some sort. + +struct HttpOpRetryCompare +{ + bool operator()(const HttpOpRequest * lhs, const HttpOpRequest * rhs) + { + return lhs->mPolicyRetryAt < rhs->mPolicyRetryAt; + } +}; + + +typedef std::priority_queue, + LLCore::HttpOpRetryCompare> HttpRetryQueueBase; + +class HttpRetryQueue : public HttpRetryQueueBase +{ +public: + HttpRetryQueue() + : HttpRetryQueueBase() + {} + + ~HttpRetryQueue() + {} + +protected: + HttpRetryQueue(const HttpRetryQueue &); // Not defined + void operator=(const HttpRetryQueue &); // Not defined + +public: + const container_type & get_container() const + { + return c; + } + + container_type & get_container() + { + return c; + } + +}; // end class HttpRetryQueue + + +} // end namespace LLCore + + +#endif // _LLCORE_HTTP_RETRY_QUEUE_H_ diff --git a/indra/llcorehttp/httpcommon.h b/indra/llcorehttp/httpcommon.h index fd2661b700..42b75edb41 100644 --- a/indra/llcorehttp/httpcommon.h +++ b/indra/llcorehttp/httpcommon.h @@ -114,6 +114,9 @@ namespace LLCore typedef void * HttpHandle; #define LLCORE_HTTP_HANDLE_INVALID (NULL) +/// For internal scheduling and metrics, we use a microsecond +/// timebase compatible with the environment. +typedef U64 HttpTime; /// Error codes defined by the library itself as distinct from /// libcurl (or any other transport provider). @@ -180,6 +183,15 @@ struct HttpStatus mStatus(status) {} + HttpStatus(int http_status) + : mType(http_status), + mStatus(http_status >= 200 && http_status <= 299 + ? HE_SUCCESS + : HE_REPLY_ERROR) + { + llassert(http_status >= 100 && http_status <= 999); + } + HttpStatus(const HttpStatus & rhs) : mType(rhs.mType), mStatus(rhs.mStatus) diff --git a/indra/llcorehttp/tests/test_httprequest.hpp b/indra/llcorehttp/tests/test_httprequest.hpp index 0e9d7d8979..2d91b95347 100644 --- a/indra/llcorehttp/tests/test_httprequest.hpp +++ b/indra/llcorehttp/tests/test_httprequest.hpp @@ -381,7 +381,7 @@ void HttpRequestTestObjectType::test<5>() // Run the notification pump. int count(0); - int limit(20); + int limit(180); // With retries, can take more than 10 seconds to give up while (count++ < limit && mHandlerCalls < 1) { req->update(1000); -- cgit v1.2.3