summaryrefslogtreecommitdiff
path: root/indra/llcommon/llfasttimer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'indra/llcommon/llfasttimer.cpp')
-rwxr-xr-xindra/llcommon/llfasttimer.cpp491
1 files changed, 491 insertions, 0 deletions
diff --git a/indra/llcommon/llfasttimer.cpp b/indra/llcommon/llfasttimer.cpp
new file mode 100755
index 0000000000..3d28cd15b0
--- /dev/null
+++ b/indra/llcommon/llfasttimer.cpp
@@ -0,0 +1,491 @@
+/**
+ * @file llfasttimer.cpp
+ * @brief Implementation of the fast timer.
+ *
+ * $LicenseInfo:firstyear=2004&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2010, Linden Research, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
+ * $/LicenseInfo$
+ */
+#include "linden_common.h"
+
+#include "llfasttimer.h"
+
+#include "llmemory.h"
+#include "llprocessor.h"
+#include "llsingleton.h"
+#include "lltreeiterators.h"
+#include "llsdserialize.h"
+#include "llunits.h"
+#include "llsd.h"
+#include "lltracerecording.h"
+#include "lltracethreadrecorder.h"
+
+#include <boost/bind.hpp>
+#include <queue>
+
+
+#if LL_WINDOWS
+#include "lltimer.h"
+#elif LL_LINUX || LL_SOLARIS
+#include <sys/time.h>
+#include <sched.h>
+#include "lltimer.h"
+#elif LL_DARWIN
+#include <sys/time.h>
+#include "lltimer.h" // get_clock_count()
+#else
+#error "architecture not supported"
+#endif
+
+namespace LLTrace
+{
+
+//////////////////////////////////////////////////////////////////////////////
+// statics
+
+bool BlockTimer::sLog = false;
+std::string BlockTimer::sLogName = "";
+bool BlockTimer::sMetricLog = false;
+
+#if LL_LINUX || LL_SOLARIS
+U64 BlockTimer::sClockResolution = 1000000000; // Nanosecond resolution
+#else
+U64 BlockTimer::sClockResolution = 1000000; // Microsecond resolution
+#endif
+
+static LLMutex* sLogLock = NULL;
+static std::queue<LLSD> sLogQueue;
+
+block_timer_tree_df_iterator_t begin_block_timer_tree_df(BlockTimerStatHandle& id)
+{
+ return block_timer_tree_df_iterator_t(&id,
+ boost::bind(boost::mem_fn(&BlockTimerStatHandle::beginChildren), _1),
+ boost::bind(boost::mem_fn(&BlockTimerStatHandle::endChildren), _1));
+}
+
+block_timer_tree_df_iterator_t end_block_timer_tree_df()
+{
+ return block_timer_tree_df_iterator_t();
+}
+
+block_timer_tree_df_post_iterator_t begin_block_timer_tree_df_post(BlockTimerStatHandle& id)
+{
+ return block_timer_tree_df_post_iterator_t(&id,
+ boost::bind(boost::mem_fn(&BlockTimerStatHandle::beginChildren), _1),
+ boost::bind(boost::mem_fn(&BlockTimerStatHandle::endChildren), _1));
+}
+
+block_timer_tree_df_post_iterator_t end_block_timer_tree_df_post()
+{
+ return block_timer_tree_df_post_iterator_t();
+}
+
+block_timer_tree_bf_iterator_t begin_block_timer_tree_bf(BlockTimerStatHandle& id)
+{
+ return block_timer_tree_bf_iterator_t(&id,
+ boost::bind(boost::mem_fn(&BlockTimerStatHandle::beginChildren), _1),
+ boost::bind(boost::mem_fn(&BlockTimerStatHandle::endChildren), _1));
+}
+
+block_timer_tree_bf_iterator_t end_block_timer_tree_bf()
+ {
+ return block_timer_tree_bf_iterator_t();
+ }
+
+block_timer_tree_df_iterator_t begin_timer_tree(BlockTimerStatHandle& id)
+ {
+ return block_timer_tree_df_iterator_t(&id,
+ boost::bind(boost::mem_fn(&BlockTimerStatHandle::beginChildren), _1),
+ boost::bind(boost::mem_fn(&BlockTimerStatHandle::endChildren), _1));
+ }
+
+block_timer_tree_df_iterator_t end_timer_tree()
+ {
+ return block_timer_tree_df_iterator_t();
+}
+
+
+// sort child timers by name
+struct SortTimerByName
+ {
+ bool operator()(const BlockTimerStatHandle* i1, const BlockTimerStatHandle* i2)
+ {
+ return i1->getName() < i2->getName();
+ }
+};
+
+static BlockTimerStatHandle sRootTimer("root", NULL);
+BlockTimerStatHandle& BlockTimer::getRootTimeBlock()
+{
+ return sRootTimer;
+ }
+
+void BlockTimer::pushLog(LLSD log)
+{
+ LLMutexLock lock(sLogLock);
+
+ sLogQueue.push(log);
+}
+
+void BlockTimer::setLogLock(LLMutex* lock)
+{
+ sLogLock = lock;
+}
+
+
+//static
+#if (LL_DARWIN || LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
+U64 BlockTimer::countsPerSecond()
+{
+ return sClockResolution;
+}
+#else // windows or x86-mac or x86-linux or x86-solaris
+U64 BlockTimer::countsPerSecond()
+{
+#if LL_FASTTIMER_USE_RDTSC || !LL_WINDOWS
+ //getCPUFrequency returns MHz and sCPUClockFrequency wants to be in Hz
+ static LLUnit<U64, LLUnits::Hertz> sCPUClockFrequency = LLProcessorInfo().getCPUFrequency();
+ return sCPUClockFrequency.value();
+#else
+ // If we're not using RDTSC, each fasttimer tick is just a performance counter tick.
+ // Not redefining the clock frequency itself (in llprocessor.cpp/calculate_cpu_frequency())
+ // since that would change displayed MHz stats for CPUs
+ static bool firstcall = true;
+ static U64 sCPUClockFrequency;
+ if (firstcall)
+ {
+ QueryPerformanceFrequency((LARGE_INTEGER*)&sCPUClockFrequency);
+ firstcall = false;
+ }
+ return sCPUClockFrequency.value();
+#endif
+}
+#endif
+
+BlockTimerStatHandle::BlockTimerStatHandle(const char* name, const char* description)
+: StatType<TimeBlockAccumulator>(name, description)
+{}
+
+TimeBlockTreeNode& BlockTimerStatHandle::getTreeNode() const
+{
+ TimeBlockTreeNode* nodep = LLTrace::get_thread_recorder()->getTimeBlockTreeNode(getIndex());
+ llassert(nodep);
+ return *nodep;
+}
+
+
+void BlockTimer::bootstrapTimerTree()
+{
+ for (BlockTimerStatHandle::instance_tracker_t::instance_iter it = BlockTimerStatHandle::instance_tracker_t::beginInstances(), end_it = BlockTimerStatHandle::instance_tracker_t::endInstances();
+ it != end_it;
+ ++it)
+ {
+ BlockTimerStatHandle& timer = static_cast<BlockTimerStatHandle&>(*it);
+ if (&timer == &BlockTimer::getRootTimeBlock()) continue;
+
+ // bootstrap tree construction by attaching to last timer to be on stack
+ // when this timer was called
+ if (timer.getParent() == &BlockTimer::getRootTimeBlock())
+{
+ TimeBlockAccumulator& accumulator = timer.getCurrentAccumulator();
+
+ if (accumulator.mLastCaller)
+ {
+ timer.setParent(accumulator.mLastCaller);
+ accumulator.mParent = accumulator.mLastCaller;
+ }
+ // no need to push up tree on first use, flag can be set spuriously
+ accumulator.mMoveUpTree = false;
+ }
+ }
+}
+
+// bump timers up tree if they have been flagged as being in the wrong place
+// do this in a bottom up order to promote descendants first before promoting ancestors
+// this preserves partial order derived from current frame's observations
+void BlockTimer::incrementalUpdateTimerTree()
+{
+ for(block_timer_tree_df_post_iterator_t it = begin_block_timer_tree_df_post(BlockTimer::getRootTimeBlock());
+ it != end_block_timer_tree_df_post();
+ ++it)
+ {
+ BlockTimerStatHandle* timerp = *it;
+
+ // sort timers by time last called, so call graph makes sense
+ TimeBlockTreeNode& tree_node = timerp->getTreeNode();
+ if (tree_node.mNeedsSorting)
+{
+ std::sort(tree_node.mChildren.begin(), tree_node.mChildren.end(), SortTimerByName());
+}
+
+ // skip root timer
+ if (timerp != &BlockTimer::getRootTimeBlock())
+{
+ TimeBlockAccumulator& accumulator = timerp->getCurrentAccumulator();
+
+ if (accumulator.mMoveUpTree)
+{
+ // since ancestors have already been visited, re-parenting won't affect tree traversal
+ //step up tree, bringing our descendants with us
+ LL_DEBUGS("FastTimers") << "Moving " << timerp->getName() << " from child of " << timerp->getParent()->getName() <<
+ " to child of " << timerp->getParent()->getParent()->getName() << LL_ENDL;
+ timerp->setParent(timerp->getParent()->getParent());
+ accumulator.mParent = timerp->getParent();
+ accumulator.mMoveUpTree = false;
+
+ // don't bubble up any ancestors until descendants are done bubbling up
+ // as ancestors may call this timer only on certain paths, so we want to resolve
+ // child-most block locations before their parents
+ it.skipAncestors();
+ }
+ }
+ }
+}
+
+
+void BlockTimer::updateTimes()
+ {
+ // walk up stack of active timers and accumulate current time while leaving timing structures active
+ BlockTimerStackRecord* stack_record = LLThreadLocalSingletonPointer<BlockTimerStackRecord>::getInstance();
+ if (!stack_record) return;
+
+ U64 cur_time = getCPUClockCount64();
+ BlockTimer* cur_timer = stack_record->mActiveTimer;
+ TimeBlockAccumulator* accumulator = &stack_record->mTimeBlock->getCurrentAccumulator();
+
+ while(cur_timer
+ && cur_timer->mParentTimerData.mActiveTimer != cur_timer) // root defined by parent pointing to self
+ {
+ U64 cumulative_time_delta = cur_time - cur_timer->mStartTime;
+ cur_timer->mStartTime = cur_time;
+
+ accumulator->mTotalTimeCounter += cumulative_time_delta;
+ accumulator->mSelfTimeCounter += cumulative_time_delta - stack_record->mChildTime;
+ stack_record->mChildTime = 0;
+
+ stack_record = &cur_timer->mParentTimerData;
+ accumulator = &stack_record->mTimeBlock->getCurrentAccumulator();
+ cur_timer = stack_record->mActiveTimer;
+
+ stack_record->mChildTime += cumulative_time_delta;
+ }
+}
+
+static LLTrace::BlockTimerStatHandle FTM_PROCESS_TIMES("Process FastTimer Times");
+
+// not thread safe, so only call on main thread
+//static
+void BlockTimer::processTimes()
+{
+#if LL_TRACE_ENABLED
+ LL_RECORD_BLOCK_TIME(FTM_PROCESS_TIMES);
+ get_clock_count(); // good place to calculate clock frequency
+
+ // set up initial tree
+ bootstrapTimerTree();
+
+ incrementalUpdateTimerTree();
+
+ updateTimes();
+
+ // reset for next frame
+ for (BlockTimerStatHandle::instance_tracker_t::instance_iter it = BlockTimerStatHandle::instance_tracker_t::beginInstances(),
+ end_it = BlockTimerStatHandle::instance_tracker_t::endInstances();
+ it != end_it;
+ ++it)
+ {
+ BlockTimerStatHandle& timer = static_cast<BlockTimerStatHandle&>(*it);
+ TimeBlockAccumulator& accumulator = timer.getCurrentAccumulator();
+
+ accumulator.mLastCaller = NULL;
+ accumulator.mMoveUpTree = false;
+ }
+#endif
+}
+
+std::vector<BlockTimerStatHandle*>::iterator BlockTimerStatHandle::beginChildren()
+ {
+ return getTreeNode().mChildren.begin();
+ }
+
+std::vector<BlockTimerStatHandle*>::iterator BlockTimerStatHandle::endChildren()
+ {
+ return getTreeNode().mChildren.end();
+}
+
+std::vector<BlockTimerStatHandle*>& BlockTimerStatHandle::getChildren()
+{
+ return getTreeNode().mChildren;
+ }
+
+bool BlockTimerStatHandle::hasChildren()
+{
+ return ! getTreeNode().mChildren.empty();
+}
+
+// static
+void BlockTimer::logStats()
+{
+ // get ready for next frame
+ if (sLog)
+ { //output current frame counts to performance log
+
+ static S32 call_count = 0;
+ if (call_count % 100 == 0)
+ {
+ LL_DEBUGS("FastTimers") << "countsPerSecond: " << countsPerSecond() << LL_ENDL;
+ LL_DEBUGS("FastTimers") << "LLProcessorInfo().getCPUFrequency() " << LLProcessorInfo().getCPUFrequency() << LL_ENDL;
+ LL_DEBUGS("FastTimers") << "getCPUClockCount32() " << getCPUClockCount32() << LL_ENDL;
+ LL_DEBUGS("FastTimers") << "getCPUClockCount64() " << getCPUClockCount64() << LL_ENDL;
+ LL_DEBUGS("FastTimers") << "elapsed sec " << ((F64)getCPUClockCount64() / (F64HertzImplicit)LLProcessorInfo().getCPUFrequency()) << LL_ENDL;
+ }
+ call_count++;
+
+ F64Seconds total_time(0);
+ LLSD sd;
+
+ {
+ for (BlockTimerStatHandle::instance_tracker_t::instance_iter it = BlockTimerStatHandle::instance_tracker_t::beginInstances(),
+ end_it = BlockTimerStatHandle::instance_tracker_t::endInstances();
+ it != end_it;
+ ++it)
+ {
+ BlockTimerStatHandle& timer = static_cast<BlockTimerStatHandle&>(*it);
+ LLTrace::PeriodicRecording& frame_recording = LLTrace::get_frame_recording();
+ sd[timer.getName()]["Time"] = (LLSD::Real) (frame_recording.getLastRecording().getSum(timer).value());
+ sd[timer.getName()]["Calls"] = (LLSD::Integer) (frame_recording.getLastRecording().getSum(timer.callCount()));
+
+ // computing total time here because getting the root timer's getCountHistory
+ // doesn't work correctly on the first frame
+ total_time += frame_recording.getLastRecording().getSum(timer);
+ }
+ }
+
+ sd["Total"]["Time"] = (LLSD::Real) total_time.value();
+ sd["Total"]["Calls"] = (LLSD::Integer) 1;
+
+ {
+ LLMutexLock lock(sLogLock);
+ sLogQueue.push(sd);
+ }
+ }
+
+}
+
+//static
+void BlockTimer::dumpCurTimes()
+{
+ LLTrace::PeriodicRecording& frame_recording = LLTrace::get_frame_recording();
+ LLTrace::Recording& last_frame_recording = frame_recording.getLastRecording();
+
+ // walk over timers in depth order and output timings
+ for(block_timer_tree_df_iterator_t it = begin_timer_tree(BlockTimer::getRootTimeBlock());
+ it != end_timer_tree();
+ ++it)
+ {
+ BlockTimerStatHandle* timerp = (*it);
+ F64Seconds total_time = last_frame_recording.getSum(*timerp);
+ U32 num_calls = last_frame_recording.getSum(timerp->callCount());
+
+ // Don't bother with really brief times, keep output concise
+ if (total_time < F32Milliseconds(0.1f)) continue;
+
+ std::ostringstream out_str;
+ BlockTimerStatHandle* parent_timerp = timerp;
+ while(parent_timerp && parent_timerp != parent_timerp->getParent())
+ {
+ out_str << "\t";
+ parent_timerp = parent_timerp->getParent();
+ }
+
+ out_str << timerp->getName() << " "
+ << std::setprecision(3) << total_time.valueInUnits<LLUnits::Milliseconds>() << " ms, "
+ << num_calls << " calls";
+
+ LL_INFOS() << out_str.str() << LL_ENDL;
+}
+}
+
+//static
+void BlockTimer::writeLog(std::ostream& os)
+{
+ while (!sLogQueue.empty())
+ {
+ LLSD& sd = sLogQueue.front();
+ LLSDSerialize::toXML(sd, os);
+ LLMutexLock lock(sLogLock);
+ sLogQueue.pop();
+ }
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// TimeBlockAccumulator
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+TimeBlockAccumulator::TimeBlockAccumulator()
+: mTotalTimeCounter(0),
+ mSelfTimeCounter(0),
+ mCalls(0),
+ mLastCaller(NULL),
+ mActiveCount(0),
+ mMoveUpTree(false),
+ mParent(NULL)
+{}
+
+void TimeBlockAccumulator::addSamples( const TimeBlockAccumulator& other, EBufferAppendType append_type )
+{
+#if LL_TRACE_ENABLED
+ // we can't merge two unrelated time block samples, as that will screw with the nested timings
+ // due to the call hierarchy of each thread
+ llassert(append_type == SEQUENTIAL);
+ mTotalTimeCounter += other.mTotalTimeCounter;
+ mSelfTimeCounter += other.mSelfTimeCounter;
+ mCalls += other.mCalls;
+ mLastCaller = other.mLastCaller;
+ mActiveCount = other.mActiveCount;
+ mMoveUpTree = other.mMoveUpTree;
+ mParent = other.mParent;
+#endif
+}
+
+void TimeBlockAccumulator::reset( const TimeBlockAccumulator* other )
+{
+ mCalls = 0;
+ mSelfTimeCounter = 0;
+ mTotalTimeCounter = 0;
+
+ if (other)
+{
+ mLastCaller = other->mLastCaller;
+ mActiveCount = other->mActiveCount;
+ mMoveUpTree = other->mMoveUpTree;
+ mParent = other->mParent;
+ }
+}
+
+F64Seconds BlockTimer::getElapsedTime()
+{
+ U64 total_time = getCPUClockCount64() - mStartTime;
+
+ return F64Seconds((F64)total_time / (F64)BlockTimer::countsPerSecond());
+}
+
+
+} // namespace LLTrace