From d666a3d92cb5dd9844c29e5472db542de7b5ac9e Mon Sep 17 00:00:00 2001 From: Monty Brandenberg Date: Thu, 18 Nov 2010 08:43:09 -0800 Subject: ESC-154 ESC-155 ESC-156 Asset fetch requests wrapped to allow for measurements. Asset fetch enqueues, dequeues and completion times recorded to asset stats collector. Texture fetch operations (http and udp) recorded to asset stats collector. Stats collector time vallue switched from F32 to U64 which is the more common type in the viewer. Cross-thread mechanism introduced to communicate region changes and generate global statistics messages. Facility to deliver metrics via Capabilities sketched in but needs additional work. Documentation and diagrams added. --- indra/newview/lltexturefetch.cpp | 488 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 483 insertions(+), 5 deletions(-) (limited to 'indra/newview/lltexturefetch.cpp') diff --git a/indra/newview/lltexturefetch.cpp b/indra/newview/lltexturefetch.cpp index fafef84aa2..df99818ee9 100644 --- a/indra/newview/lltexturefetch.cpp +++ b/indra/newview/lltexturefetch.cpp @@ -49,6 +49,7 @@ #include "llviewertexture.h" #include "llviewerregion.h" #include "llviewerstats.h" +#include "llviewerassetstats.h" #include "llworld.h" ////////////////////////////////////////////////////////////////////////////// @@ -143,7 +144,7 @@ public: /*virtual*/ bool deleteOK(); // called from update() (WORK THREAD) ~LLTextureFetchWorker(); - void relese() { --mActiveCount; } + // void relese() { --mActiveCount; } S32 callbackHttpGet(const LLChannelDescriptors& channels, const LLIOPipe::buffer_ptr_t& buffer, @@ -161,9 +162,11 @@ public: mGetReason = reason; } - void setCanUseHTTP(bool can_use_http) {mCanUseHTTP = can_use_http;} - bool getCanUseHTTP()const {return mCanUseHTTP ;} + void setCanUseHTTP(bool can_use_http) { mCanUseHTTP = can_use_http; } + bool getCanUseHTTP() const { return mCanUseHTTP; } + LLTextureFetch & getFetcher() { return *mFetcher; } + protected: LLTextureFetchWorker(LLTextureFetch* fetcher, const std::string& url, const LLUUID& id, const LLHost& host, F32 priority, S32 discard, S32 size); @@ -277,6 +280,8 @@ private: S32 mLastPacket; U16 mTotalPackets; U8 mImageCodec; + + LLViewerAssetStats::duration_t mMetricsStartTime; }; ////////////////////////////////////////////////////////////////////////////// @@ -333,6 +338,18 @@ public: S32 data_size = worker->callbackHttpGet(channels, buffer, partial, success); mFetcher->removeFromHTTPQueue(mID, data_size); + + if (worker->mMetricsStartTime) + { + LLViewerAssetStatsFF::record_response_thread1(LLViewerAssetType::AT_TEXTURE, + true, + LLImageBase::TYPE_AVATAR_BAKE == worker->mType, + LLViewerAssetStatsFF::get_timestamp() - worker->mMetricsStartTime); + worker->mMetricsStartTime = 0; + } + LLViewerAssetStatsFF::record_dequeue_thread1(LLViewerAssetType::AT_TEXTURE, + true, + LLImageBase::TYPE_AVATAR_BAKE == worker->mType); } else { @@ -355,6 +372,201 @@ private: bool mFollowRedir; }; +////////////////////////////////////////////////////////////////////////////// + +// Cross-thread messaging for asset metrics. + +namespace +{ + +/** + * @brief Base class for cross-thread requests made of the fetcher + * + * I believe the intent of the LLQueuedThread class was to + * have these operations derived from LLQueuedThread::QueuedRequest + * but the texture fetcher has elected to manage the queue + * in its own manner. So these are free-standing objects which are + * managed in simple FIFO order on the mCommands queue of the + * LLTextureFetch object. + * + * What each represents is a simple command sent from an + * outside thread into the TextureFetch thread to be processed + * in order and in a timely fashion (though not an absolute + * higher priority than other operations of the thread). + * Each operation derives a new class from the base customizing + * members, constructors and the doWork() method to effect + * the command. + * + * The flow is one-directional. There are two global instances + * of the LLViewerAssetStats collector, one for the main program's + * thread pointed to by gViewerAssetStatsMain and one for the + * TextureFetch thread pointed to by gViewerAssetStatsThread1. + * Common operations has each thread recording metrics events + * into the respective collector unconcerned with locking and + * the state of any other thread. But when the agent moves into + * a different region or the metrics timer expires and a report + * needs to be sent back to the grid, messaging across grids + * is required to distribute data and perform global actions. + * In pseudo-UML, it looks like: + * + * Main Thread1 + * . . + * . . + * +-----+ . + * | AM | . + * +--+--+ . + * +-------+ | . + * | Main | +--+--+ . + * | | | SRE |---. . + * | Stats | +-----+ \ . + * | | | \ (uuid) +-----+ + * | Coll. | +--+--+ `-------->| SR | + * +-------+ | MSC | +--+--+ + * | ^ +-----+ | + * | | (uuid) / . +-----+ (uuid) + * | `--------' . | MSC |---------. + * | . +-----+ | + * | +-----+ . v + * | | TE | . +-------+ + * | +--+--+ . | Thd1 | + * | | . | | + * | (llsd) +-----+ . | Stats | + * `--------->| RSC | . | | + * +--+--+ . | Coll. | + * | . +-------+ + * +--+--+ . | + * | SME |---. . | + * +-----+ \ . | + * . \ (llsd) +-----+ | + * . `-------->| SM | | + * . +--+--+ | + * . | | + * . +-----+ (llsd) | + * . | RSC |<--------' + * . +-----+ + * . | + * . +-----+ + * . | CP |--> HTTP PUT + * . +-----+ + * . . + * . . + * + * + * Key: + * + * SRE - Set Region Enqueued. Enqueue a 'Set Region' command in + * the other thread providing the new UUID of the region. + * TFReqSetRegion carries the data. + * SR - Set Region. New region UUID is sent to the thread-local + * collector. + * SME - Send Metrics Enqueued. Enqueue a 'Send Metrics' command + * including an ownership transfer of an LLSD. + * TFReqSendMetrics carries the data. + * SM - Send Metrics. Global metrics reporting operation. Takes + * the remote LLSD from the command, merges it with and LLSD + * from the local collector and sends it to the grid. + * AM - Agent Moved. Agent has completed some sort of move to a + * new region. + * TE - Timer Expired. Metrics timer has expired (on the order + * of 10 minutes). + * CP - CURL Put + * MSC - Modify Stats Collector. State change in the thread-local + * collector. Typically a region change which affects the + * global pointers used to find the 'current stats'. + * RSC - Read Stats Collector. Extract collector data in LLSD form. + * + */ +class TFRequest // : public LLQueuedThread::QueuedRequest +{ +public: + // Default ctors and assignment operator are correct. + + virtual ~TFRequest() + {} + + virtual bool doWork(LLTextureFetchWorker * worker) = 0; +}; + + +/** + * @brief Implements a 'Set Region' cross-thread command. + * + * When an agent moves to a new region, subsequent metrics need + * to be binned into a new or existing stats collection in 1:1 + * relationship with the region. We communicate this region + * change across the threads involved in the communication with + * this message. + * + * Corresponds to LLTextureFetch::commandSetRegion() + */ +class TFReqSetRegion : public TFRequest +{ +public: + TFReqSetRegion(const LLUUID & region_id) + : TFRequest(), + mRegionID(region_id) + {} + TFReqSetRegion & operator=(const TFReqSetRegion &); // Not defined + + virtual ~TFReqSetRegion() + {} + + virtual bool doWork(LLTextureFetchWorker * worker); + +public: + const LLUUID mRegionID; +}; + + +/** + * @brief Implements a 'Send Metrics' cross-thread command. + * + * This is the big operation. The main thread gathers metrics + * for a period of minutes into LLViewerAssetStats and other + * objects then builds an LLSD to represent the data. It uses + * this command to transfer the LLSD, content *and* ownership, + * to the TextureFetch thread which adds its own metrics and + * kicks of an HTTP POST of the resulting data to the currently + * active metrics collector. + * + * Corresponds to LLTextureFetch::commandSendMetrics() + */ +class TFReqSendMetrics : public TFRequest +{ +public: + /** + * Construct the 'Send Metrics' command to have the TextureFetch + * thread add and log metrics data. + * + * @param caps_url URL of a "ViewerMetrics" Caps target + * to receive the data. Does not have to + * be associated with a particular region. + * + * @param report_main Pointer to LLSD containing main + * thread metrics. Ownership transfers + * to the new thread using very carefully + * constructed code. + */ + TFReqSendMetrics(const std::string & caps_url, + LLSD * report_main) + : TFRequest(), + mCapsURL(caps_url), + mReportMain(report_main) + {} + TFReqSendMetrics & operator=(const TFReqSendMetrics &); // Not defined + + virtual ~TFReqSendMetrics(); + + virtual bool doWork(LLTextureFetchWorker * worker); + +public: + const std::string mCapsURL; + LLSD * mReportMain; +}; + +} // end of anonymous namespace + + ////////////////////////////////////////////////////////////////////////////// //static @@ -374,6 +586,9 @@ const char* LLTextureFetchWorker::sStateDescs[] = { "DONE", }; +// static +volatile bool LLTextureFetch::svMetricsDataBreak(true); // Start with a data break + // called from MAIN THREAD LLTextureFetchWorker::LLTextureFetchWorker(LLTextureFetch* fetcher, @@ -423,7 +638,8 @@ LLTextureFetchWorker::LLTextureFetchWorker(LLTextureFetch* fetcher, mFirstPacket(0), mLastPacket(-1), mTotalPackets(0), - mImageCodec(IMG_CODEC_INVALID) + mImageCodec(IMG_CODEC_INVALID), + mMetricsStartTime(0) { mCanUseNET = mUrl.empty() ; @@ -591,6 +807,10 @@ bool LLTextureFetchWorker::doWork(S32 param) return true; // abort } } + + // Run a cross-thread command, if any. + mFetcher->cmdDoWork(this); + if(mImagePriority < F_ALMOST_ZERO) { if (mState == INIT || mState == LOAD_FROM_NETWORK || mState == LOAD_FROM_SIMULATOR) @@ -800,7 +1020,15 @@ bool LLTextureFetchWorker::doWork(S32 param) mRequestedDiscard = mDesiredDiscard; mSentRequest = QUEUED; mFetcher->addToNetworkQueue(this); + if (! mMetricsStartTime) + { + mMetricsStartTime = LLViewerAssetStatsFF::get_timestamp(); + } + LLViewerAssetStatsFF::record_enqueue_thread1(LLViewerAssetType::AT_TEXTURE, + false, + LLImageBase::TYPE_AVATAR_BAKE == mType); setPriority(LLWorkerThread::PRIORITY_LOW | mWorkPriority); + return false; } else @@ -809,6 +1037,12 @@ bool LLTextureFetchWorker::doWork(S32 param) //llassert_always(mFetcher->mNetworkQueue.find(mID) != mFetcher->mNetworkQueue.end()); // Make certain this is in the network queue //mFetcher->addToNetworkQueue(this); + //if (! mMetricsStartTime) + //{ + // mMetricsStartTime = LLViewerAssetStatsFF::get_timestamp(); + //} + //LLViewerAssetStatsFF::record_enqueue_thread1(LLViewerAssetType::AT_TEXTURE, false, + // LLImageBase::TYPE_AVATAR_BAKE == mType); //setPriority(LLWorkerThread::PRIORITY_LOW | mWorkPriority); return false; } @@ -832,11 +1066,30 @@ bool LLTextureFetchWorker::doWork(S32 param) } setPriority(LLWorkerThread::PRIORITY_HIGH | mWorkPriority); mState = DECODE_IMAGE; - mWriteToCacheState = SHOULD_WRITE ; + mWriteToCacheState = SHOULD_WRITE; + + if (mMetricsStartTime) + { + LLViewerAssetStatsFF::record_response_thread1(LLViewerAssetType::AT_TEXTURE, + false, + LLImageBase::TYPE_AVATAR_BAKE == mType, + LLViewerAssetStatsFF::get_timestamp() - mMetricsStartTime); + mMetricsStartTime = 0; + } + LLViewerAssetStatsFF::record_dequeue_thread1(LLViewerAssetType::AT_TEXTURE, + false, + LLImageBase::TYPE_AVATAR_BAKE == mType); } else { mFetcher->addToNetworkQueue(this); // failsafe + if (! mMetricsStartTime) + { + mMetricsStartTime = LLViewerAssetStatsFF::get_timestamp(); + } + LLViewerAssetStatsFF::record_enqueue_thread1(LLViewerAssetType::AT_TEXTURE, + false, + LLImageBase::TYPE_AVATAR_BAKE == mType); setPriority(LLWorkerThread::PRIORITY_LOW | mWorkPriority); } return false; @@ -898,6 +1151,14 @@ bool LLTextureFetchWorker::doWork(S32 param) mState = WAIT_HTTP_REQ; mFetcher->addToHTTPQueue(mID); + if (! mMetricsStartTime) + { + mMetricsStartTime = LLViewerAssetStatsFF::get_timestamp(); + } + LLViewerAssetStatsFF::record_enqueue_thread1(LLViewerAssetType::AT_TEXTURE, + true, + LLImageBase::TYPE_AVATAR_BAKE == mType); + // Will call callbackHttpGet when curl request completes std::vector headers; headers.push_back("Accept: image/x-j2c"); @@ -1534,6 +1795,12 @@ LLTextureFetch::~LLTextureFetch() { clearDeleteList() ; + while (! mCommands.empty()) + { + delete mCommands.front(); + mCommands.erase(mCommands.begin()); + } + // ~LLQueuedThread() called here } @@ -1815,6 +2082,25 @@ bool LLTextureFetch::updateRequestPriority(const LLUUID& id, F32 priority) return res; } +// virtual +bool LLTextureFetch::runCondition() +{ + // Caller is holding the lock on LLThread's condition variable. + + // LLQueuedThread, unlike its base class LLThread, makes this a + // private method which is unfortunate. I want to use it directly + // but I'm going to have to re-implement the logic here (or change + // declarations, which I don't want to do right now). + + bool have_no_commands(false); + { + LLMutexLock lock(&mQueueMutex); + + have_no_commands = mCommands.empty(); + } + return ! (have_no_commands && mRequestQueue.empty() && mIdleThread); +} + ////////////////////////////////////////////////////////////////////////////// // MAIN THREAD @@ -2357,3 +2643,195 @@ void LLTextureFetch::dump() } } +////////////////////////////////////////////////////////////////////////////// + +// cross-thread command methods + +void LLTextureFetch::commandSetRegion(const LLUUID & region_id) +{ + TFReqSetRegion * req = new TFReqSetRegion(region_id); + + cmdEnqueue(req); +} + +void LLTextureFetch::commandSendMetrics(const std::string & caps_url, + LLSD * report_main) +{ + TFReqSendMetrics * req = new TFReqSendMetrics(caps_url, report_main); + + cmdEnqueue(req); +} + +void LLTextureFetch::commandDataBreak() +{ + // The pedantically correct way to implement this is to create a command + // request object in the above fashion and enqueue it. However, this is + // simple data of an advisorial not operational nature and this case + // of shared-write access is tolerable. + + LLTextureFetch::svMetricsDataBreak = true; +} + +void LLTextureFetch::cmdEnqueue(TFRequest * req) +{ + lockQueue(); + mCommands.push_back(req); + wake(); + unlockQueue(); +} + +TFRequest * LLTextureFetch::cmdDequeue() +{ + TFRequest * ret = 0; + + lockQueue(); + if (! mCommands.empty()) + { + ret = mCommands.front(); + mCommands.erase(mCommands.begin()); + } + unlockQueue(); + + return ret; +} + +void LLTextureFetch::cmdDoWork(LLTextureFetchWorker * worker) +{ + // Queue is expected to be locked here. + + if (mDebugPause) + { + return; // debug: don't do any work + } + + TFRequest * req = cmdDequeue(); + if (req) + { + // One request per pass should really be enough for this. + req->doWork(worker); + delete req; + } +} + + +////////////////////////////////////////////////////////////////////////////// + +// Private (anonymous) class methods implementing the command scheme. + +namespace +{ + +/** + * Implements the 'Set Region' command. + * + * Thread: Thread1 (TextureFetch) + */ +bool +TFReqSetRegion::doWork(LLTextureFetchWorker *) +{ + LLViewerAssetStatsFF::set_region_thread1(mRegionID); + + return true; +} + + +TFReqSendMetrics::~TFReqSendMetrics() +{ + delete mReportMain; + mReportMain = 0; +} + + +/** + * Implements the 'Send Metrics' command. Takes over + * ownership of the passed LLSD pointer. + * + * Thread: Thread1 (TextureFetch) + */ +bool +TFReqSendMetrics::doWork(LLTextureFetchWorker * fetch_worker) +{ + /* + * HTTP POST responder. Doesn't do much but tries to + * detect simple breaks in recording the metrics stream. + * + * The 'volatile' modifiers don't indicate signals, + * mmap'd memory or threads, really. They indicate that + * the referenced data is part of a pseudo-closure for + * this responder rather than being required for correct + * operation. + */ + class lcl_responder : public LLCurl::Responder + { + public: + lcl_responder(volatile bool & post_failed, + volatile bool & post_succeeded) + : LLHTTPClient::Responder(), + mPostFailedStatus(post_failed), + mPostSucceededStatus(post_succeeded) + {} + + // virtual + void error(U32 status_num, const std::string & reason) + { + mPostFailedStatus = true; + } + + // virtual + void result(const LLSD & content) + { + mPostSucceededStatus = true; + } + + private: + volatile bool & mPostFailedStatus; + volatile bool & mPostSucceededStatus; + }; + + if (! gViewerAssetStatsThread1) + return true; + + if (! mCapsURL.empty()) + { + static volatile bool not_initial_report(false); + static S32 report_sequence(0); + + // We've already taken over ownership of the LLSD at this point + // and can do normal LLSD sharing operations at this point. But + // still being careful, regardless. + LLSD & envelope = *mReportMain; + { + envelope["sequence"] = report_sequence; + envelope["regions_alt"] = gViewerAssetStatsThread1->asLLSD(); + envelope["initial"] = ! not_initial_report; // Initial data from viewer + envelope["break"] = LLTextureFetch::svMetricsDataBreak; // Break in data prior to this report + + // *FIXME: Need to merge the two metrics streams here.... + } + + // Update sequence number and other metadata for next attempt. + if (S32_MAX == ++report_sequence) + report_sequence = 0; + LLTextureFetch::svMetricsDataBreak = false; + + LLCurlRequest::headers_t headers; + fetch_worker->getFetcher().getCurlRequest().post(mCapsURL, + headers, + envelope, + new lcl_responder(LLTextureFetch::svMetricsDataBreak, + not_initial_report)); + } + else + { + LLTextureFetch::svMetricsDataBreak = true; + } + + gViewerAssetStatsThread1->reset(); + + return true; +} + +} // end of anonymous namespace + + + -- cgit v1.2.3 From a99db82e9b3ce25bf2745721b57f0259a770b26a Mon Sep 17 00:00:00 2001 From: Monty Brandenberg Date: Fri, 19 Nov 2010 15:14:40 -0800 Subject: ESC-155 Multi-threaded umbrella collector for stats aggregation Code complete with the intelligence to merge counts, mins, maxes and means with reasonable defences. Added QAMode controls to the viewer so that we can QA this more quickly by reducing the timing interval and sending the metrics body to local logging as well as to the caps service. --- indra/newview/lltexturefetch.cpp | 67 +++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 29 deletions(-) (limited to 'indra/newview/lltexturefetch.cpp') diff --git a/indra/newview/lltexturefetch.cpp b/indra/newview/lltexturefetch.cpp index df99818ee9..d303d425c8 100644 --- a/indra/newview/lltexturefetch.cpp +++ b/indra/newview/lltexturefetch.cpp @@ -2764,68 +2764,77 @@ TFReqSendMetrics::doWork(LLTextureFetchWorker * fetch_worker) class lcl_responder : public LLCurl::Responder { public: - lcl_responder(volatile bool & post_failed, - volatile bool & post_succeeded) + lcl_responder(volatile bool & reporting_break, + volatile bool & reporting_started) : LLHTTPClient::Responder(), - mPostFailedStatus(post_failed), - mPostSucceededStatus(post_succeeded) + mReportingBreak(reporting_break), + mReportingStarted(reporting_started) {} // virtual void error(U32 status_num, const std::string & reason) { - mPostFailedStatus = true; + mReportingBreak = true; } // virtual void result(const LLSD & content) { - mPostSucceededStatus = true; + mReportingBreak = false; + mReportingStarted = true; } private: - volatile bool & mPostFailedStatus; - volatile bool & mPostSucceededStatus; + volatile bool & mReportingBreak; + volatile bool & mReportingStarted; }; if (! gViewerAssetStatsThread1) return true; - if (! mCapsURL.empty()) - { - static volatile bool not_initial_report(false); - static S32 report_sequence(0); + static volatile bool reporting_started(false); + static S32 report_sequence(0); - // We've already taken over ownership of the LLSD at this point - // and can do normal LLSD sharing operations at this point. But - // still being careful, regardless. - LLSD & envelope = *mReportMain; - { - envelope["sequence"] = report_sequence; - envelope["regions_alt"] = gViewerAssetStatsThread1->asLLSD(); - envelope["initial"] = ! not_initial_report; // Initial data from viewer - envelope["break"] = LLTextureFetch::svMetricsDataBreak; // Break in data prior to this report + // We've already taken over ownership of the LLSD at this point + // and can do normal LLSD sharing operations at this point. But + // still being careful, regardless. + LLSD & main_stats = *mReportMain; - // *FIXME: Need to merge the two metrics streams here.... - } + LLSD thread1_stats = gViewerAssetStatsThread1->asLLSD(); // 'duration' & 'regions' from here + thread1_stats["message"] = "ViewerAssetMetrics"; + thread1_stats["sequence"] = report_sequence; + thread1_stats["initial"] = ! reporting_started; // Initial data from viewer + thread1_stats["break"] = LLTextureFetch::svMetricsDataBreak; // Break in data prior to this report + + // Update sequence number + if (S32_MAX == ++report_sequence) + report_sequence = 0; + + // Merge the two LLSDs into a single report + LLViewerAssetStatsFF::merge_stats(main_stats, thread1_stats); - // Update sequence number and other metadata for next attempt. - if (S32_MAX == ++report_sequence) - report_sequence = 0; - LLTextureFetch::svMetricsDataBreak = false; + // *TODO: Consider putting a report size limiter here. + if (! mCapsURL.empty()) + { LLCurlRequest::headers_t headers; fetch_worker->getFetcher().getCurlRequest().post(mCapsURL, headers, - envelope, + thread1_stats, new lcl_responder(LLTextureFetch::svMetricsDataBreak, - not_initial_report)); + reporting_started)); } else { LLTextureFetch::svMetricsDataBreak = true; } + // In QA mode, Metrics submode, log the result for ease of testing + if (gSavedSettings.getBOOL("QAMode") && gSavedSettings.getBOOL("QAModeMetricsSubmode")) + { + LL_INFOS("QAViewerMetrics") << thread1_stats << LL_ENDL; + } + gViewerAssetStatsThread1->reset(); return true; -- cgit v1.2.3 From f98a622325d8982d32ae98e189f5d3ec6ada183f Mon Sep 17 00:00:00 2001 From: Monty Brandenberg Date: Mon, 22 Nov 2010 10:26:25 -0800 Subject: ESC-154 ESC-156 Metrics integration into viewer's threads Removed declared but undefined interfaces from LLTextureFetch family. Had inserted the cross-thread command processor into some of LLTextureFetchWorker's processing which was unnatural and probably wrong. Moved it to LLTextureFetch which turned out to be far, far more natural. Better documentation on the asLLSD() format. Refined LLSD stats merger logic and enhanced unit tests to verify same. --- indra/newview/lltexturefetch.cpp | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) (limited to 'indra/newview/lltexturefetch.cpp') diff --git a/indra/newview/lltexturefetch.cpp b/indra/newview/lltexturefetch.cpp index d303d425c8..e574a35479 100644 --- a/indra/newview/lltexturefetch.cpp +++ b/indra/newview/lltexturefetch.cpp @@ -405,7 +405,7 @@ namespace * into the respective collector unconcerned with locking and * the state of any other thread. But when the agent moves into * a different region or the metrics timer expires and a report - * needs to be sent back to the grid, messaging across grids + * needs to be sent back to the grid, messaging across threads * is required to distribute data and perform global actions. * In pseudo-UML, it looks like: * @@ -484,7 +484,11 @@ public: virtual ~TFRequest() {} - virtual bool doWork(LLTextureFetchWorker * worker) = 0; + // Patterned after QueuedRequest's method but expected behavior + // is different. Always expected to complete on the first call + // and work dispatcher will assume the same and delete the + // request after invocation. + virtual bool doWork(LLTextureFetch * fetcher) = 0; }; @@ -511,7 +515,7 @@ public: virtual ~TFReqSetRegion() {} - virtual bool doWork(LLTextureFetchWorker * worker); + virtual bool doWork(LLTextureFetch * fetcher); public: const LLUUID mRegionID; @@ -557,7 +561,7 @@ public: virtual ~TFReqSendMetrics(); - virtual bool doWork(LLTextureFetchWorker * worker); + virtual bool doWork(LLTextureFetch * fetcher); public: const std::string mCapsURL; @@ -808,9 +812,6 @@ bool LLTextureFetchWorker::doWork(S32 param) } } - // Run a cross-thread command, if any. - mFetcher->cmdDoWork(this); - if(mImagePriority < F_ALMOST_ZERO) { if (mState == INIT || mState == LOAD_FROM_NETWORK || mState == LOAD_FROM_SIMULATOR) @@ -2188,6 +2189,9 @@ void LLTextureFetch::threadedUpdate() } process_timer.reset(); + // Run a cross-thread command, if any. + cmdDoWork(); + // Update Curl on same thread as mCurlGetRequest was constructed S32 processed = mCurlGetRequest->process(); if (processed > 0) @@ -2695,22 +2699,22 @@ TFRequest * LLTextureFetch::cmdDequeue() return ret; } -void LLTextureFetch::cmdDoWork(LLTextureFetchWorker * worker) +void LLTextureFetch::cmdDoWork() { - // Queue is expected to be locked here. - if (mDebugPause) { return; // debug: don't do any work } + lockQueue(); TFRequest * req = cmdDequeue(); if (req) { // One request per pass should really be enough for this. - req->doWork(worker); + req->doWork(this); delete req; } + unlockQueue(); } @@ -2727,7 +2731,7 @@ namespace * Thread: Thread1 (TextureFetch) */ bool -TFReqSetRegion::doWork(LLTextureFetchWorker *) +TFReqSetRegion::doWork(LLTextureFetch *) { LLViewerAssetStatsFF::set_region_thread1(mRegionID); @@ -2749,7 +2753,7 @@ TFReqSendMetrics::~TFReqSendMetrics() * Thread: Thread1 (TextureFetch) */ bool -TFReqSendMetrics::doWork(LLTextureFetchWorker * fetch_worker) +TFReqSendMetrics::doWork(LLTextureFetch * fetcher) { /* * HTTP POST responder. Doesn't do much but tries to @@ -2818,11 +2822,11 @@ TFReqSendMetrics::doWork(LLTextureFetchWorker * fetch_worker) if (! mCapsURL.empty()) { LLCurlRequest::headers_t headers; - fetch_worker->getFetcher().getCurlRequest().post(mCapsURL, - headers, - thread1_stats, - new lcl_responder(LLTextureFetch::svMetricsDataBreak, - reporting_started)); + fetcher->getCurlRequest().post(mCapsURL, + headers, + thread1_stats, + new lcl_responder(LLTextureFetch::svMetricsDataBreak, + reporting_started)); } else { -- cgit v1.2.3 From 9ec3334184c71879c2f8bd0f27095b71c4302559 Mon Sep 17 00:00:00 2001 From: Monty Brandenberg Date: Tue, 23 Nov 2010 13:31:22 -0500 Subject: ESC-154 ESC-156 Data collection and control for viewer metrics Detect QAMode (and new QAModeMetricsSubmode) settings which enable logging of metrics report locally and a faster cycle time to reduce test waiting. Do this only in the main thread and propagate the result via collector constructors (will likely move that out and put it in llappviewer/lltexturefetch which is more correct scope). Managed to deadlock myself with a recursive mutex (sheesh). --- indra/newview/lltexturefetch.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'indra/newview/lltexturefetch.cpp') diff --git a/indra/newview/lltexturefetch.cpp b/indra/newview/lltexturefetch.cpp index e574a35479..8e43084adb 100644 --- a/indra/newview/lltexturefetch.cpp +++ b/indra/newview/lltexturefetch.cpp @@ -2680,8 +2680,9 @@ void LLTextureFetch::cmdEnqueue(TFRequest * req) { lockQueue(); mCommands.push_back(req); - wake(); unlockQueue(); + + wake(); } TFRequest * LLTextureFetch::cmdDequeue() @@ -2706,7 +2707,6 @@ void LLTextureFetch::cmdDoWork() return; // debug: don't do any work } - lockQueue(); TFRequest * req = cmdDequeue(); if (req) { @@ -2714,7 +2714,6 @@ void LLTextureFetch::cmdDoWork() req->doWork(this); delete req; } - unlockQueue(); } @@ -2834,7 +2833,7 @@ TFReqSendMetrics::doWork(LLTextureFetch * fetcher) } // In QA mode, Metrics submode, log the result for ease of testing - if (gSavedSettings.getBOOL("QAMode") && gSavedSettings.getBOOL("QAModeMetricsSubmode")) + if (gViewerAssetStatsThread1->isQAMode()) { LL_INFOS("QAViewerMetrics") << thread1_stats << LL_ENDL; } -- cgit v1.2.3 From 3962b155b4939f831dfd82701d46c4f15aa9f7ac Mon Sep 17 00:00:00 2001 From: Monty Brandenberg Date: Tue, 23 Nov 2010 12:29:15 -0800 Subject: ESC-154 ESC-156 Integrating metrics collector into viewer. After discussions, renamed 'QAModeMetricsSubmetrics' to 'QAModeMetrics' and confirmed that LLCachedControl<> is the way to go. Moved the resulting flag out of LLViewerAssetStats (where it didn't belong) and it lives in both LLAppViewer and LLTextureFetch where it does belong. --- indra/newview/lltexturefetch.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'indra/newview/lltexturefetch.cpp') diff --git a/indra/newview/lltexturefetch.cpp b/indra/newview/lltexturefetch.cpp index 8e43084adb..2e05a67791 100644 --- a/indra/newview/lltexturefetch.cpp +++ b/indra/newview/lltexturefetch.cpp @@ -1774,7 +1774,7 @@ bool LLTextureFetchWorker::writeToCacheComplete() ////////////////////////////////////////////////////////////////////////////// // public -LLTextureFetch::LLTextureFetch(LLTextureCache* cache, LLImageDecodeThread* imagedecodethread, bool threaded) +LLTextureFetch::LLTextureFetch(LLTextureCache* cache, LLImageDecodeThread* imagedecodethread, bool threaded, bool qa_mode) : LLWorkerThread("TextureFetch", threaded), mDebugCount(0), mDebugPause(FALSE), @@ -1786,7 +1786,8 @@ LLTextureFetch::LLTextureFetch(LLTextureCache* cache, LLImageDecodeThread* image mImageDecodeThread(imagedecodethread), mTextureBandwidth(0), mHTTPTextureBits(0), - mCurlGetRequest(NULL) + mCurlGetRequest(NULL), + mQAMode(qa_mode) { mMaxBandwidth = gSavedSettings.getF32("ThrottleBandwidthKBPS"); mTextureInfo.setUpLogging(gSavedSettings.getBOOL("LogTextureDownloadsToViewerLog"), gSavedSettings.getBOOL("LogTextureDownloadsToSimulator"), gSavedSettings.getU32("TextureLoggingThreshold")); @@ -2833,7 +2834,7 @@ TFReqSendMetrics::doWork(LLTextureFetch * fetcher) } // In QA mode, Metrics submode, log the result for ease of testing - if (gViewerAssetStatsThread1->isQAMode()) + if (fetcher->isQAMode()) { LL_INFOS("QAViewerMetrics") << thread1_stats << LL_ENDL; } -- cgit v1.2.3 From 0fd80d09972657e6417193abf577084a3b3b85f1 Mon Sep 17 00:00:00 2001 From: Monty Brandenberg Date: Wed, 24 Nov 2010 16:46:40 -0500 Subject: ESC-154 ESC-156 Metrics integration across threads Using unpause() method in derived class rather than wake() in furthest base class solved the stalling problem. I still think too many levels of the LLTextureFetch hierarchy are keeping thread state, however. The LLViewerRegion instance an agent enters doesn't necessarily have its region_id yet, that only comes after the handshake, if any. So add a few more metrics insertion points to propagate region into metrics. Finally, try to launch a final metrics report when a quit is initiated. --- indra/newview/lltexturefetch.cpp | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) (limited to 'indra/newview/lltexturefetch.cpp') diff --git a/indra/newview/lltexturefetch.cpp b/indra/newview/lltexturefetch.cpp index 2e05a67791..2be3ba3280 100644 --- a/indra/newview/lltexturefetch.cpp +++ b/indra/newview/lltexturefetch.cpp @@ -2105,6 +2105,21 @@ bool LLTextureFetch::runCondition() ////////////////////////////////////////////////////////////////////////////// +// MAIN THREAD (unthreaded envs), WORKER THREAD (threaded envs) +void LLTextureFetch::commonUpdate() +{ + // Run a cross-thread command, if any. + cmdDoWork(); + + // Update Curl on same thread as mCurlGetRequest was constructed + S32 processed = mCurlGetRequest->process(); + if (processed > 0) + { + lldebugs << "processed: " << processed << " messages." << llendl; + } +} + + // MAIN THREAD //virtual S32 LLTextureFetch::update(U32 max_time_ms) @@ -2130,12 +2145,7 @@ S32 LLTextureFetch::update(U32 max_time_ms) if (!mThreaded) { - // Update Curl on same thread as mCurlGetRequest was constructed - S32 processed = mCurlGetRequest->process(); - if (processed > 0) - { - lldebugs << "processed: " << processed << " messages." << llendl; - } + commonUpdate(); } return res; @@ -2190,15 +2200,7 @@ void LLTextureFetch::threadedUpdate() } process_timer.reset(); - // Run a cross-thread command, if any. - cmdDoWork(); - - // Update Curl on same thread as mCurlGetRequest was constructed - S32 processed = mCurlGetRequest->process(); - if (processed > 0) - { - lldebugs << "processed: " << processed << " messages." << llendl; - } + commonUpdate(); #if 0 const F32 INFO_TIME = 1.0f; @@ -2657,6 +2659,7 @@ void LLTextureFetch::commandSetRegion(const LLUUID & region_id) TFReqSetRegion * req = new TFReqSetRegion(region_id); cmdEnqueue(req); + LL_INFOS("Texture") << "COMMANDING SET REGION" << LL_ENDL; } void LLTextureFetch::commandSendMetrics(const std::string & caps_url, @@ -2683,7 +2686,7 @@ void LLTextureFetch::cmdEnqueue(TFRequest * req) mCommands.push_back(req); unlockQueue(); - wake(); + unpause(); } TFRequest * LLTextureFetch::cmdDequeue() @@ -2818,7 +2821,7 @@ TFReqSendMetrics::doWork(LLTextureFetch * fetcher) LLViewerAssetStatsFF::merge_stats(main_stats, thread1_stats); // *TODO: Consider putting a report size limiter here. - + LL_INFOS("Texture") << "PROCESSING SENDMETRICS REQUEST" << LL_ENDL; if (! mCapsURL.empty()) { LLCurlRequest::headers_t headers; -- cgit v1.2.3 From a4bf7322895cac318abc3ac0a000086d227fc2fe Mon Sep 17 00:00:00 2001 From: Monty Brandenberg Date: Wed, 24 Nov 2010 15:02:46 -0800 Subject: ESC-154 ESC-155 Viewer metrics fixes for min/max merged values, floating timestamps. The min/max response time calculations needed to be sensitive to the response counts to know if their was actual data. Failure to do so introduced a gratuitous min/max test against zero values which tended to corrupt the mins. Unit tests added to test for this condition. Finished conversion of times to floating point seconds. Removed two logging events used to debug the cross-thread messaging. Looks like a code completion point. --- indra/newview/lltexturefetch.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'indra/newview/lltexturefetch.cpp') diff --git a/indra/newview/lltexturefetch.cpp b/indra/newview/lltexturefetch.cpp index 2be3ba3280..f5e2e35e1e 100644 --- a/indra/newview/lltexturefetch.cpp +++ b/indra/newview/lltexturefetch.cpp @@ -2659,7 +2659,6 @@ void LLTextureFetch::commandSetRegion(const LLUUID & region_id) TFReqSetRegion * req = new TFReqSetRegion(region_id); cmdEnqueue(req); - LL_INFOS("Texture") << "COMMANDING SET REGION" << LL_ENDL; } void LLTextureFetch::commandSendMetrics(const std::string & caps_url, @@ -2821,7 +2820,6 @@ TFReqSendMetrics::doWork(LLTextureFetch * fetcher) LLViewerAssetStatsFF::merge_stats(main_stats, thread1_stats); // *TODO: Consider putting a report size limiter here. - LL_INFOS("Texture") << "PROCESSING SENDMETRICS REQUEST" << LL_ENDL; if (! mCapsURL.empty()) { LLCurlRequest::headers_t headers; @@ -2839,7 +2837,7 @@ TFReqSendMetrics::doWork(LLTextureFetch * fetcher) // In QA mode, Metrics submode, log the result for ease of testing if (fetcher->isQAMode()) { - LL_INFOS("QAViewerMetrics") << thread1_stats << LL_ENDL; + LL_INFOS("Textures") << thread1_stats << LL_ENDL; } gViewerAssetStatsThread1->reset(); -- cgit v1.2.3 From 0f2ed092c5712cd5dcd928e079671df383227068 Mon Sep 17 00:00:00 2001 From: Monty Brandenberg Date: Mon, 29 Nov 2010 08:31:08 -0800 Subject: ESC-154 ESC-156 Now using region hash rather than region uuid as identifier. In the viewer, the region's UUID is acquired very late and isn't generally used as the canonical region identifier. The U64 region hash is a better and more consistently used identifier so I'm moving over to using that as the region key. Don't have a proper reserved invalid region hash which is unfortunate, but then, so much is. --- indra/newview/lltexturefetch.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'indra/newview/lltexturefetch.cpp') diff --git a/indra/newview/lltexturefetch.cpp b/indra/newview/lltexturefetch.cpp index f5e2e35e1e..3793085e55 100644 --- a/indra/newview/lltexturefetch.cpp +++ b/indra/newview/lltexturefetch.cpp @@ -506,9 +506,9 @@ public: class TFReqSetRegion : public TFRequest { public: - TFReqSetRegion(const LLUUID & region_id) + TFReqSetRegion(U64 region_handle) : TFRequest(), - mRegionID(region_id) + mRegionHandle(region_handle) {} TFReqSetRegion & operator=(const TFReqSetRegion &); // Not defined @@ -518,7 +518,7 @@ public: virtual bool doWork(LLTextureFetch * fetcher); public: - const LLUUID mRegionID; + const U64 mRegionHandle; }; @@ -2654,9 +2654,9 @@ void LLTextureFetch::dump() // cross-thread command methods -void LLTextureFetch::commandSetRegion(const LLUUID & region_id) +void LLTextureFetch::commandSetRegion(U64 region_handle) { - TFReqSetRegion * req = new TFReqSetRegion(region_id); + TFReqSetRegion * req = new TFReqSetRegion(region_handle); cmdEnqueue(req); } @@ -2735,7 +2735,7 @@ namespace bool TFReqSetRegion::doWork(LLTextureFetch *) { - LLViewerAssetStatsFF::set_region_thread1(mRegionID); + LLViewerAssetStatsFF::set_region_thread1(mRegionHandle); return true; } @@ -2806,9 +2806,9 @@ TFReqSendMetrics::doWork(LLTextureFetch * fetcher) // still being careful, regardless. LLSD & main_stats = *mReportMain; - LLSD thread1_stats = gViewerAssetStatsThread1->asLLSD(); // 'duration' & 'regions' from here - thread1_stats["message"] = "ViewerAssetMetrics"; - thread1_stats["sequence"] = report_sequence; + LLSD thread1_stats = gViewerAssetStatsThread1->asLLSD(); // 'duration' & 'regions' from this LLSD + thread1_stats["message"] = "ViewerAssetMetrics"; // Identifies the type of metrics + thread1_stats["sequence"] = report_sequence; // Sequence number thread1_stats["initial"] = ! reporting_started; // Initial data from viewer thread1_stats["break"] = LLTextureFetch::svMetricsDataBreak; // Break in data prior to this report -- cgit v1.2.3 From 922b1f26b7279b5f54562c413c333463fe34473b Mon Sep 17 00:00:00 2001 From: Monty Brandenberg Date: Thu, 2 Dec 2010 18:42:47 -0500 Subject: ESC-211 Metrics data sink - fix delivery by viewer The TextureFetch thread was still stalling out due to a different path that determines whether there is work or not in the thread (uses getPending()) and that had to be harmonized with the changes to runCondition(). I'm not happy with this solution but a refactor of the LLThread tree isn't in the cards right now. --- indra/newview/lltexturefetch.cpp | 89 +++++++++++++++++++++++++++++++++++----- 1 file changed, 78 insertions(+), 11 deletions(-) (limited to 'indra/newview/lltexturefetch.cpp') diff --git a/indra/newview/lltexturefetch.cpp b/indra/newview/lltexturefetch.cpp index 3793085e55..dd84290e90 100644 --- a/indra/newview/lltexturefetch.cpp +++ b/indra/newview/lltexturefetch.cpp @@ -568,6 +568,14 @@ public: LLSD * mReportMain; }; +/* + * Count of POST requests outstanding. We maintain the count + * indirectly in the CURL request responder's ctor and dtor and + * use it when determining whether or not to sleep the flag. Can't + * use the LLCurl module's request counter as it isn't thread compatible. + */ +LLAtomic32 curl_post_request_count = 0; + } // end of anonymous namespace @@ -2084,6 +2092,33 @@ bool LLTextureFetch::updateRequestPriority(const LLUUID& id, F32 priority) return res; } +// Replicates and expands upon the base class's +// getPending() implementation. getPending() and +// runCondition() replicate one another's logic to +// an extent and are sometimes used for the same +// function (deciding whether or not to sleep/pause +// a thread). So the implementations need to stay +// in step, at least until this can be refactored and +// the redundancy eliminated. +// +// May be called from any thread + +//virtual +S32 LLTextureFetch::getPending() +{ + S32 res; + lockData(); + { + LLMutexLock lock(&mQueueMutex); + + res = mRequestQueue.size(); + res += curl_post_request_count; + res += mCommands.size(); + } + unlockData(); + return res; +} + // virtual bool LLTextureFetch::runCondition() { @@ -2100,7 +2135,12 @@ bool LLTextureFetch::runCondition() have_no_commands = mCommands.empty(); } - return ! (have_no_commands && mRequestQueue.empty() && mIdleThread); + + bool have_no_curl_requests(0 == curl_post_request_count); + + return ! (have_no_commands + && have_no_curl_requests + && (mRequestQueue.empty() && mIdleThread)); } ////////////////////////////////////////////////////////////////////////////// @@ -2116,7 +2156,7 @@ void LLTextureFetch::commonUpdate() if (processed > 0) { lldebugs << "processed: " << processed << " messages." << llendl; - } + } } @@ -2766,31 +2806,56 @@ TFReqSendMetrics::doWork(LLTextureFetch * fetcher) * the referenced data is part of a pseudo-closure for * this responder rather than being required for correct * operation. + * + * We don't try very hard with the POST request. We give + * it one shot and that's more-or-less it. With a proper + * refactoring of the LLQueuedThread usage, these POSTs + * could be put in a request object and made more reliable. */ class lcl_responder : public LLCurl::Responder { public: - lcl_responder(volatile bool & reporting_break, + lcl_responder(S32 expected_sequence, + volatile const S32 & live_sequence, + volatile bool & reporting_break, volatile bool & reporting_started) - : LLHTTPClient::Responder(), + : LLCurl::Responder(), + mExpectedSequence(expected_sequence), + mLiveSequence(live_sequence), mReportingBreak(reporting_break), mReportingStarted(reporting_started) - {} + { + curl_post_request_count++; + } + + ~lcl_responder() + { + curl_post_request_count--; + } // virtual void error(U32 status_num, const std::string & reason) { - mReportingBreak = true; + if (mLiveSequence == mExpectedSequence) + { + mReportingBreak = true; + } } // virtual void result(const LLSD & content) { - mReportingBreak = false; - mReportingStarted = true; + if (mLiveSequence == mExpectedSequence) + { + mReportingBreak = false; + mReportingStarted = true; + } } + private: + S32 mExpectedSequence; + volatile const S32 & mLiveSequence; volatile bool & mReportingBreak; volatile bool & mReportingStarted; }; @@ -2799,8 +2864,8 @@ TFReqSendMetrics::doWork(LLTextureFetch * fetcher) return true; static volatile bool reporting_started(false); - static S32 report_sequence(0); - + static volatile S32 report_sequence(0); + // We've already taken over ownership of the LLSD at this point // and can do normal LLSD sharing operations at this point. But // still being careful, regardless. @@ -2826,7 +2891,9 @@ TFReqSendMetrics::doWork(LLTextureFetch * fetcher) fetcher->getCurlRequest().post(mCapsURL, headers, thread1_stats, - new lcl_responder(LLTextureFetch::svMetricsDataBreak, + new lcl_responder(report_sequence, + report_sequence, + LLTextureFetch::svMetricsDataBreak, reporting_started)); } else -- cgit v1.2.3 From ca76c55847cdaabe662c880c4d744916c8ca71ac Mon Sep 17 00:00:00 2001 From: Monty Brandenberg Date: Fri, 3 Dec 2010 12:31:12 -0800 Subject: ESC-211 ESC-222 Viewer/Sim comms and outbound data throttle Cleaned up some of the messaging code that sends the LLSD stats report off to the viewer. Added WARNS-level messages when there's a problem with delivery that will result in a data break. Users probably won't care. Added an outbound data throttle that limits stats to the 10 regions of longest occupancy. Should be a reasonable first cut. --- indra/newview/lltexturefetch.cpp | 75 ++++++++++++++++++++++++++++++++++------ 1 file changed, 65 insertions(+), 10 deletions(-) (limited to 'indra/newview/lltexturefetch.cpp') diff --git a/indra/newview/lltexturefetch.cpp b/indra/newview/lltexturefetch.cpp index dd84290e90..b46f338303 100644 --- a/indra/newview/lltexturefetch.cpp +++ b/indra/newview/lltexturefetch.cpp @@ -27,6 +27,7 @@ #include "llviewerprecompiledheaders.h" #include +#include #include "llstl.h" @@ -446,7 +447,7 @@ namespace * . +-----+ * . | * . +-----+ - * . | CP |--> HTTP PUT + * . | CP |--> HTTP POST * . +-----+ * . . * . . @@ -469,7 +470,7 @@ namespace * new region. * TE - Timer Expired. Metrics timer has expired (on the order * of 10 minutes). - * CP - CURL Put + * CP - CURL Post * MSC - Modify Stats Collector. State change in the thread-local * collector. Typically a region change which affects the * global pointers used to find the 'current stats'. @@ -571,11 +572,23 @@ public: /* * Count of POST requests outstanding. We maintain the count * indirectly in the CURL request responder's ctor and dtor and - * use it when determining whether or not to sleep the flag. Can't + * use it when determining whether or not to sleep the thread. Can't * use the LLCurl module's request counter as it isn't thread compatible. */ LLAtomic32 curl_post_request_count = 0; - + +/* + * Examines the merged viewer metrics report and if found to be too long, + * will attempt to truncate it in some reasonable fashion. + * + * @param max_regions Limit of regions allowed in report. + * + * @param metrics Full, merged viewer metrics report. + * + * @returns If data was truncated, returns true. + */ +bool truncate_viewer_metrics(int max_regions, LLSD & metrics); + } // end of anonymous namespace @@ -2128,7 +2141,9 @@ bool LLTextureFetch::runCondition() // private method which is unfortunate. I want to use it directly // but I'm going to have to re-implement the logic here (or change // declarations, which I don't want to do right now). - + // + // Changes here may need to be reflected in getPending(). + bool have_no_commands(false); { LLMutexLock lock(&mQueueMutex); @@ -2139,8 +2154,8 @@ bool LLTextureFetch::runCondition() bool have_no_curl_requests(0 == curl_post_request_count); return ! (have_no_commands - && have_no_curl_requests - && (mRequestQueue.empty() && mIdleThread)); + && have_no_curl_requests + && (mRequestQueue.empty() && mIdleThread)); // From base class } ////////////////////////////////////////////////////////////////////////////// @@ -2840,6 +2855,8 @@ TFReqSendMetrics::doWork(LLTextureFetch * fetcher) { mReportingBreak = true; } + LL_WARNS("Texture") << "Break in metrics stream due to POST failure to metrics collection service. Reason: " + << reason << LL_ENDL; } // virtual @@ -2851,14 +2868,14 @@ TFReqSendMetrics::doWork(LLTextureFetch * fetcher) mReportingStarted = true; } } - private: S32 mExpectedSequence; volatile const S32 & mLiveSequence; volatile bool & mReportingBreak; volatile bool & mReportingStarted; - }; + + }; // class lcl_responder if (! gViewerAssetStatsThread1) return true; @@ -2884,7 +2901,9 @@ TFReqSendMetrics::doWork(LLTextureFetch * fetcher) // Merge the two LLSDs into a single report LLViewerAssetStatsFF::merge_stats(main_stats, thread1_stats); - // *TODO: Consider putting a report size limiter here. + // Limit the size of the stats report if necessary. + thread1_stats["truncated"] = truncate_viewer_metrics(10, thread1_stats); + if (! mCapsURL.empty()) { LLCurlRequest::headers_t headers; @@ -2912,6 +2931,42 @@ TFReqSendMetrics::doWork(LLTextureFetch * fetcher) return true; } + +bool +truncate_viewer_metrics(int max_regions, LLSD & metrics) +{ + static const LLSD::String reg_tag("regions"); + static const LLSD::String duration_tag("duration"); + + LLSD & reg_map(metrics[reg_tag]); + if (reg_map.size() <= max_regions) + { + return false; + } + + // Build map of region hashes ordered by duration + typedef std::multimap reg_ordered_list_t; + reg_ordered_list_t regions_by_duration; + + LLSD::map_const_iterator it_end(reg_map.endMap()); + for (LLSD::map_const_iterator it(reg_map.beginMap()); it_end != it; ++it) + { + LLSD::Integer duration = (it->second)[duration_tag].asInteger(); + regions_by_duration.insert(reg_ordered_list_t::value_type(duration, it->first)); + } + + // Erase excess region reports selecting shortest duration first + reg_ordered_list_t::const_iterator it2_end(regions_by_duration.end()); + reg_ordered_list_t::const_iterator it2(regions_by_duration.begin()); + int limit(regions_by_duration.size() - max_regions); + for (int i(0); i < limit && it2_end != it2; ++i, ++it2) + { + reg_map.erase(it2->second); + } + + return true; +} + } // end of anonymous namespace -- cgit v1.2.3 From 11d420dd32e643a191c16b04f2fbb42c2b4db628 Mon Sep 17 00:00:00 2001 From: Monty Brandenberg Date: Fri, 10 Dec 2010 17:41:05 -0800 Subject: Decided to refactor a bit. Was using LLSD as an internal data representation transferring ownership, doing data aggregation in a very pedantic way. That's just adding unneeded cost and complication. Used the same objects to transport data as are collecting it and everything got simpler, faster, easier to read with fewer gotchas. Bit myself *again* doing the min/max/mean merges but the unittests where there to pick me up again. Added a per-region FPS metric while I was at it. This is much asked for and there was a convenient place to sample the value. --- indra/newview/lltexturefetch.cpp | 98 ++++++++++++++++++++++++---------------- 1 file changed, 58 insertions(+), 40 deletions(-) (limited to 'indra/newview/lltexturefetch.cpp') diff --git a/indra/newview/lltexturefetch.cpp b/indra/newview/lltexturefetch.cpp index 73d78c9334..e1f9d7bdcc 100644 --- a/indra/newview/lltexturefetch.cpp +++ b/indra/newview/lltexturefetch.cpp @@ -442,18 +442,18 @@ namespace * | | TE | . +-------+ * | +--+--+ . | Thd1 | * | | . | | - * | (llsd) +-----+ . | Stats | + * | +-----+ . | Stats | * `--------->| RSC | . | | * +--+--+ . | Coll. | * | . +-------+ * +--+--+ . | * | SME |---. . | * +-----+ \ . | - * . \ (llsd) +-----+ | + * . \ (clone) +-----+ | * . `-------->| SM | | * . +--+--+ | * . | | - * . +-----+ (llsd) | + * . +-----+ | * . | RSC |<--------' * . +-----+ * . | @@ -472,11 +472,12 @@ namespace * SR - Set Region. New region UUID is sent to the thread-local * collector. * SME - Send Metrics Enqueued. Enqueue a 'Send Metrics' command - * including an ownership transfer of an LLSD. + * including an ownership transfer of a cloned LLViewerAssetStats. * TFReqSendMetrics carries the data. * SM - Send Metrics. Global metrics reporting operation. Takes - * the remote LLSD from the command, merges it with and LLSD - * from the local collector and sends it to the grid. + * the cloned stats from the command, merges it with the + * thread's local stats, converts to LLSD and sends it on + * to the grid. * AM - Agent Moved. Agent has completed some sort of move to a * new region. * TE - Timer Expired. Metrics timer has expired (on the order @@ -485,7 +486,8 @@ namespace * MSC - Modify Stats Collector. State change in the thread-local * collector. Typically a region change which affects the * global pointers used to find the 'current stats'. - * RSC - Read Stats Collector. Extract collector data in LLSD form. + * RSC - Read Stats Collector. Extract collector data cloning it + * (i.e. deep copy) when necessary. * */ class TFRequest // : public LLQueuedThread::QueuedRequest @@ -539,11 +541,12 @@ public: * * This is the big operation. The main thread gathers metrics * for a period of minutes into LLViewerAssetStats and other - * objects then builds an LLSD to represent the data. It uses - * this command to transfer the LLSD, content *and* ownership, - * to the TextureFetch thread which adds its own metrics and - * kicks of an HTTP POST of the resulting data to the currently - * active metrics collector. + * objects then makes a snapshot of the data by cloning the + * collector. This command transfers the clone, along with a few + * additional arguments (UUIDs), handing ownership to the + * TextureFetch thread. It then merges its own data into the + * cloned copy, converts to LLSD and kicks off an HTTP POST of + * the resulting data to the currently active metrics collector. * * Corresponds to LLTextureFetch::commandSendMetrics() */ @@ -558,16 +561,24 @@ public: * to receive the data. Does not have to * be associated with a particular region. * - * @param report_main Pointer to LLSD containing main - * thread metrics. Ownership transfers - * to the new thread using very carefully - * constructed code. + * @param session_id UUID of the agent's session. + * + * @param agent_id UUID of the agent. (Being pure here...) + * + * @param main_stats Pointer to a clone of the main thread's + * LLViewerAssetStats data. Thread1 takes + * ownership of the copy and disposes of it + * when done. */ TFReqSendMetrics(const std::string & caps_url, - LLSD * report_main) + const LLUUID & session_id, + const LLUUID & agent_id, + LLViewerAssetStats * main_stats) : TFRequest(), mCapsURL(caps_url), - mReportMain(report_main) + mSessionID(session_id), + mAgentID(agent_id), + mMainStats(main_stats) {} TFReqSendMetrics & operator=(const TFReqSendMetrics &); // Not defined @@ -577,7 +588,9 @@ public: public: const std::string mCapsURL; - LLSD * mReportMain; + const LLUUID mSessionID; + const LLUUID mAgentID; + LLViewerAssetStats * mMainStats; }; /* @@ -2727,9 +2740,11 @@ void LLTextureFetch::commandSetRegion(U64 region_handle) } void LLTextureFetch::commandSendMetrics(const std::string & caps_url, - LLSD * report_main) + const LLUUID & session_id, + const LLUUID & agent_id, + LLViewerAssetStats * main_stats) { - TFReqSendMetrics * req = new TFReqSendMetrics(caps_url, report_main); + TFReqSendMetrics * req = new TFReqSendMetrics(caps_url, session_id, agent_id, main_stats); cmdEnqueue(req); } @@ -2808,14 +2823,14 @@ TFReqSetRegion::doWork(LLTextureFetch *) TFReqSendMetrics::~TFReqSendMetrics() { - delete mReportMain; - mReportMain = 0; + delete mMainStats; + mMainStats = 0; } /** * Implements the 'Send Metrics' command. Takes over - * ownership of the passed LLSD pointer. + * ownership of the passed LLViewerAssetStats pointer. * * Thread: Thread1 (TextureFetch) */ @@ -2893,33 +2908,36 @@ TFReqSendMetrics::doWork(LLTextureFetch * fetcher) static volatile bool reporting_started(false); static volatile S32 report_sequence(0); - // We've already taken over ownership of the LLSD at this point - // and can do normal LLSD sharing operations at this point. But - // still being careful, regardless. - LLSD & main_stats = *mReportMain; - - LLSD thread1_stats = gViewerAssetStatsThread1->asLLSD(); // 'duration' & 'regions' from this LLSD - thread1_stats["message"] = "ViewerAssetMetrics"; // Identifies the type of metrics - thread1_stats["sequence"] = report_sequence; // Sequence number - thread1_stats["initial"] = ! reporting_started; // Initial data from viewer - thread1_stats["break"] = LLTextureFetch::svMetricsDataBreak; // Break in data prior to this report + // We've taken over ownership of the stats copy at this + // point. Get a working reference to it for merging here + // but leave it in 'this'. Destructor will rid us of it. + LLViewerAssetStats & main_stats = *mMainStats; + + // Merge existing stats into those from main, convert to LLSD + main_stats.merge(*gViewerAssetStatsThread1); + LLSD merged_llsd = main_stats.asLLSD(); + + // Add some additional meta fields to the content + merged_llsd["session_id"] = mSessionID; + merged_llsd["agent_id"] = mAgentID; + merged_llsd["message"] = "ViewerAssetMetrics"; // Identifies the type of metrics + merged_llsd["sequence"] = report_sequence; // Sequence number + merged_llsd["initial"] = ! reporting_started; // Initial data from viewer + merged_llsd["break"] = LLTextureFetch::svMetricsDataBreak; // Break in data prior to this report // Update sequence number if (S32_MAX == ++report_sequence) report_sequence = 0; - // Merge the two LLSDs into a single report - LLViewerAssetStatsFF::merge_stats(main_stats, thread1_stats); - // Limit the size of the stats report if necessary. - thread1_stats["truncated"] = truncate_viewer_metrics(10, thread1_stats); + merged_llsd["truncated"] = truncate_viewer_metrics(10, merged_llsd); if (! mCapsURL.empty()) { LLCurlRequest::headers_t headers; fetcher->getCurlRequest().post(mCapsURL, headers, - thread1_stats, + merged_llsd, new lcl_responder(report_sequence, report_sequence, LLTextureFetch::svMetricsDataBreak, @@ -2933,7 +2951,7 @@ TFReqSendMetrics::doWork(LLTextureFetch * fetcher) // In QA mode, Metrics submode, log the result for ease of testing if (fetcher->isQAMode()) { - LL_INFOS("Textures") << thread1_stats << LL_ENDL; + LL_INFOS("Textures") << merged_llsd << LL_ENDL; } gViewerAssetStatsThread1->reset(); -- cgit v1.2.3 From bb53d27b7ad6e7bb7b1871f103b221703d56e4d2 Mon Sep 17 00:00:00 2001 From: Monty Brandenberg Date: Sat, 11 Dec 2010 16:16:07 -0500 Subject: ESC-211 ESC-212 Use arrays in payload to grid and compact payload First, introduced a compact payload format that allows blocks of metrics to be dropped from the viewer->collector payload compressing 1200 bytes of LLSD into about 300, give-or-take. Then converted to using LLSD arrays in the payload to enumerate the regions encountered. This simplifies much data handling from the viewer all the way into the final formatter of the metrics on the grid. --- indra/newview/lltexturefetch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'indra/newview/lltexturefetch.cpp') diff --git a/indra/newview/lltexturefetch.cpp b/indra/newview/lltexturefetch.cpp index e1f9d7bdcc..88905372f6 100644 --- a/indra/newview/lltexturefetch.cpp +++ b/indra/newview/lltexturefetch.cpp @@ -2915,7 +2915,7 @@ TFReqSendMetrics::doWork(LLTextureFetch * fetcher) // Merge existing stats into those from main, convert to LLSD main_stats.merge(*gViewerAssetStatsThread1); - LLSD merged_llsd = main_stats.asLLSD(); + LLSD merged_llsd = main_stats.asLLSD(true); // Add some additional meta fields to the content merged_llsd["session_id"] = mSessionID; -- cgit v1.2.3 From 622c9f772c5ca11d2c05c78e23761fae2467dd2f Mon Sep 17 00:00:00 2001 From: Monty Brandenberg Date: Mon, 13 Dec 2010 11:17:41 -0500 Subject: Cleanup a cross-thread command dtor. It was technically correct but looked a bit dodgy with pointer ownership. --- indra/newview/lltexturefetch.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'indra/newview/lltexturefetch.cpp') diff --git a/indra/newview/lltexturefetch.cpp b/indra/newview/lltexturefetch.cpp index 88905372f6..e13fcf027f 100644 --- a/indra/newview/lltexturefetch.cpp +++ b/indra/newview/lltexturefetch.cpp @@ -1844,8 +1844,9 @@ LLTextureFetch::~LLTextureFetch() while (! mCommands.empty()) { - delete mCommands.front(); + TFRequest * req(mCommands.front()); mCommands.erase(mCommands.begin()); + delete req; } // ~LLQueuedThread() called here -- cgit v1.2.3 From de8fa40209300a92a595be59073a2f0cb258e15b Mon Sep 17 00:00:00 2001 From: Monty Brandenberg Date: Wed, 15 Dec 2010 15:50:09 -0500 Subject: ESC-235 Truncation of over-sized metrics reports wasn't working. Legacy of the LLSD::Map-to-LLSD::Array conversion, this ended up performing an erase on the array rather than the map taking out all the regions. So, there *was* a metrics report, it was just empty of regions. Fixed and scanned for more array/map problems and corrected the data type for duration sorts (should have been Real). --- indra/newview/lltexturefetch.cpp | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'indra/newview/lltexturefetch.cpp') diff --git a/indra/newview/lltexturefetch.cpp b/indra/newview/lltexturefetch.cpp index e13fcf027f..25ad2fe717 100644 --- a/indra/newview/lltexturefetch.cpp +++ b/indra/newview/lltexturefetch.cpp @@ -2974,25 +2974,27 @@ truncate_viewer_metrics(int max_regions, LLSD & metrics) } // Build map of region hashes ordered by duration - typedef std::multimap reg_ordered_list_t; + typedef std::multimap reg_ordered_list_t; reg_ordered_list_t regions_by_duration; - LLSD::map_const_iterator it_end(reg_map.endMap()); - for (LLSD::map_const_iterator it(reg_map.beginMap()); it_end != it; ++it) + int ind(0); + LLSD::array_const_iterator it_end(reg_map.endArray()); + for (LLSD::array_const_iterator it(reg_map.beginArray()); it_end != it; ++it, ++ind) { - LLSD::Integer duration = (it->second)[duration_tag].asInteger(); - regions_by_duration.insert(reg_ordered_list_t::value_type(duration, it->first)); + LLSD::Real duration = (*it)[duration_tag].asReal(); + regions_by_duration.insert(reg_ordered_list_t::value_type(duration, ind)); } - // Erase excess region reports selecting shortest duration first - reg_ordered_list_t::const_iterator it2_end(regions_by_duration.end()); - reg_ordered_list_t::const_iterator it2(regions_by_duration.begin()); - int limit(regions_by_duration.size() - max_regions); - for (int i(0); i < limit && it2_end != it2; ++i, ++it2) + // Build a replacement regions array with the longest-persistence regions + LLSD new_region(LLSD::emptyArray()); + reg_ordered_list_t::const_reverse_iterator it2_end(regions_by_duration.rend()); + reg_ordered_list_t::const_reverse_iterator it2(regions_by_duration.rbegin()); + for (int i(0); i < max_regions && it2_end != it2; ++i, ++it2) { - reg_map.erase(it2->second); + new_region.append(reg_map[it2->second]); } - + reg_map = new_region; + return true; } -- cgit v1.2.3 From 3c05ebd28635e867f9726062b08cdbf4a7b53b22 Mon Sep 17 00:00:00 2001 From: Monty Brandenberg Date: Thu, 16 Dec 2010 16:42:26 -0800 Subject: ESC-237 No static init of LLAtomics and move TFRequest out of unnamed namespace. Linux startup crash appears to be due to static/global C++ init of LLAtomic types. The initializer with explicit value makes some runtime calls and it looks like these assume, at least on Linux, that apr_initialize() has been called. So move the static POST count to a member and provide accessors and increment/decrement. Command queue was built on a pointer to a class in anonymous namespace and that's not quite valid. Made it a nested class (really a nested forward declaration) while keeping the derived classes in anonymous. --- indra/newview/lltexturefetch.cpp | 42 ++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 23 deletions(-) (limited to 'indra/newview/lltexturefetch.cpp') diff --git a/indra/newview/lltexturefetch.cpp b/indra/newview/lltexturefetch.cpp index 25ad2fe717..4f63abb152 100644 --- a/indra/newview/lltexturefetch.cpp +++ b/indra/newview/lltexturefetch.cpp @@ -388,9 +388,6 @@ private: // Cross-thread messaging for asset metrics. -namespace -{ - /** * @brief Base class for cross-thread requests made of the fetcher * @@ -490,7 +487,7 @@ namespace * (i.e. deep copy) when necessary. * */ -class TFRequest // : public LLQueuedThread::QueuedRequest +class LLTextureFetch::TFRequest // : public LLQueuedThread::QueuedRequest { public: // Default ctors and assignment operator are correct. @@ -505,6 +502,8 @@ public: virtual bool doWork(LLTextureFetch * fetcher) = 0; }; +namespace +{ /** * @brief Implements a 'Set Region' cross-thread command. @@ -517,11 +516,11 @@ public: * * Corresponds to LLTextureFetch::commandSetRegion() */ -class TFReqSetRegion : public TFRequest +class TFReqSetRegion : public LLTextureFetch::TFRequest { public: TFReqSetRegion(U64 region_handle) - : TFRequest(), + : LLTextureFetch::TFRequest(), mRegionHandle(region_handle) {} TFReqSetRegion & operator=(const TFReqSetRegion &); // Not defined @@ -550,7 +549,7 @@ public: * * Corresponds to LLTextureFetch::commandSendMetrics() */ -class TFReqSendMetrics : public TFRequest +class TFReqSendMetrics : public LLTextureFetch::TFRequest { public: /** @@ -574,7 +573,7 @@ public: const LLUUID & session_id, const LLUUID & agent_id, LLViewerAssetStats * main_stats) - : TFRequest(), + : LLTextureFetch::TFRequest(), mCapsURL(caps_url), mSessionID(session_id), mAgentID(agent_id), @@ -593,14 +592,6 @@ public: LLViewerAssetStats * mMainStats; }; -/* - * Count of POST requests outstanding. We maintain the count - * indirectly in the CURL request responder's ctor and dtor and - * use it when determining whether or not to sleep the thread. Can't - * use the LLCurl module's request counter as it isn't thread compatible. - */ -LLAtomic32 curl_post_request_count = 0; - /* * Examines the merged viewer metrics report and if found to be too long, * will attempt to truncate it in some reasonable fashion. @@ -1834,6 +1825,7 @@ LLTextureFetch::LLTextureFetch(LLTextureCache* cache, LLImageDecodeThread* image mCurlGetRequest(NULL), mQAMode(qa_mode) { + mCurlPOSTRequestCount = 0; mMaxBandwidth = gSavedSettings.getF32("ThrottleBandwidthKBPS"); mTextureInfo.setUpLogging(gSavedSettings.getBOOL("LogTextureDownloadsToViewerLog"), gSavedSettings.getBOOL("LogTextureDownloadsToSimulator"), gSavedSettings.getU32("TextureLoggingThreshold")); } @@ -2149,7 +2141,7 @@ S32 LLTextureFetch::getPending() LLMutexLock lock(&mQueueMutex); res = mRequestQueue.size(); - res += curl_post_request_count; + res += mCurlPOSTRequestCount; res += mCommands.size(); } unlockData(); @@ -2175,7 +2167,7 @@ bool LLTextureFetch::runCondition() have_no_commands = mCommands.empty(); } - bool have_no_curl_requests(0 == curl_post_request_count); + bool have_no_curl_requests(0 == mCurlPOSTRequestCount); return ! (have_no_commands && have_no_curl_requests @@ -2769,7 +2761,7 @@ void LLTextureFetch::cmdEnqueue(TFRequest * req) unpause(); } -TFRequest * LLTextureFetch::cmdDequeue() +LLTextureFetch::TFRequest * LLTextureFetch::cmdDequeue() { TFRequest * ret = 0; @@ -2856,22 +2848,24 @@ TFReqSendMetrics::doWork(LLTextureFetch * fetcher) class lcl_responder : public LLCurl::Responder { public: - lcl_responder(S32 expected_sequence, + lcl_responder(LLTextureFetch * fetcher, + S32 expected_sequence, volatile const S32 & live_sequence, volatile bool & reporting_break, volatile bool & reporting_started) : LLCurl::Responder(), + mFetcher(fetcher), mExpectedSequence(expected_sequence), mLiveSequence(live_sequence), mReportingBreak(reporting_break), mReportingStarted(reporting_started) { - curl_post_request_count++; + mFetcher->incrCurlPOSTCount(); } ~lcl_responder() { - curl_post_request_count--; + mFetcher->decrCurlPOSTCount(); } // virtual @@ -2896,6 +2890,7 @@ TFReqSendMetrics::doWork(LLTextureFetch * fetcher) } private: + LLTextureFetch * mFetcher; S32 mExpectedSequence; volatile const S32 & mLiveSequence; volatile bool & mReportingBreak; @@ -2939,7 +2934,8 @@ TFReqSendMetrics::doWork(LLTextureFetch * fetcher) fetcher->getCurlRequest().post(mCapsURL, headers, merged_llsd, - new lcl_responder(report_sequence, + new lcl_responder(fetcher, + report_sequence, report_sequence, LLTextureFetch::svMetricsDataBreak, reporting_started)); -- cgit v1.2.3