114 files changed, 5083 insertions, 2744 deletions
diff --git a/indra/cmake/Copy3rdPartyLibs.cmake b/indra/cmake/Copy3rdPartyLibs.cmake
index b20d23cead..1c56f49486 100644
--- a/indra/cmake/Copy3rdPartyLibs.cmake
+++ b/indra/cmake/Copy3rdPartyLibs.cmake
@@ -104,6 +104,8 @@ if(WINDOWS)
         set(MSVC_VER 120)
     elseif (MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1920) # Visual Studio 2017
         set(MSVC_VER 140)
+    elseif (MSVC_VERSION GREATER_EQUAL 1920 AND MSVC_VERSION LESS 1930) # Visual Studio 2019
+        set(MSVC_VER 140)
     else (MSVC80)
         MESSAGE(WARNING "New MSVC_VERSION ${MSVC_VERSION} of MSVC: adapt Copy3rdPartyLibs.cmake")
     endif (MSVC80)
diff --git a/indra/cmake/LLCommon.cmake b/indra/cmake/LLCommon.cmake
index 8900419f9b..34499aaa36 100644
--- a/indra/cmake/LLCommon.cmake
+++ b/indra/cmake/LLCommon.cmake
@@ -3,12 +3,14 @@
 include(APR)
 include(Boost)
 include(EXPAT)
+include(Tracy)
 include(ZLIB)
 
 set(LLCOMMON_INCLUDE_DIRS
     ${LIBS_OPEN_DIR}/llcommon
     ${APRUTIL_INCLUDE_DIR}
     ${APR_INCLUDE_DIR}
+    ${TRACY_INCLUDE_DIR}
     )
 set(LLCOMMON_SYSTEM_INCLUDE_DIRS
     ${Boost_INCLUDE_DIRS}
@@ -30,7 +32,8 @@ else (LINUX)
         ${BOOST_FIBER_LIBRARY} 
         ${BOOST_CONTEXT_LIBRARY} 
         ${BOOST_THREAD_LIBRARY} 
-        ${BOOST_SYSTEM_LIBRARY} )
+        ${BOOST_SYSTEM_LIBRARY}
+        )
 endif (LINUX)
 
 set(LLCOMMON_LINK_SHARED OFF CACHE BOOL "Build the llcommon target as a static library.")
diff --git a/indra/cmake/Tracy.cmake b/indra/cmake/Tracy.cmake
new file mode 100644
index 0000000000..1b8c3db2e2
--- /dev/null
+++ b/indra/cmake/Tracy.cmake
@@ -0,0 +1,29 @@
+# -*- cmake -*-
+include(Prebuilt)
+
+set(USE_TRACY OFF CACHE BOOL "Use Tracy profiler.")
+
+if (USE_TRACY)
+  set(TRACY_INCLUDE_DIR ${LIBS_PREBUILT_DIR}/include/tracy) 
+
+# See: indra/llcommon/llprofiler.h
+  add_definitions(-DLL_PROFILER_CONFIGURATION=2)
+  use_prebuilt_binary(tracy)
+
+  if (WINDOWS)
+    MESSAGE(STATUS "Including Tracy for Windows: '${TRACY_INCLUDE_DIR}'")
+  endif (WINDOWS)
+
+  if (DARWIN)
+    MESSAGE(STATUS "Including Tracy for Darwin: '${TRACY_INCLUDE_DIR}'")
+  endif (DARWIN)
+
+  if (LINUX)
+    MESSAGE(STATUS "Including Tracy for Linux: '${TRACY_INCLUDE_DIR}'")
+  endif (LINUX)
+else (USE_TRACY)
+  # Tracy.cmake should not set LLCOMMON_INCLUDE_DIRS, let LLCommon.cmake do that
+  set(TRACY_INCLUDE_DIR "")
+  set(TRACY_LIBRARY "")
+endif (USE_TRACY)
+
diff --git a/indra/edit-me-to-trigger-new-build.txt b/indra/edit-me-to-trigger-new-build.txt
index 5366987cff..eab7c17b71 100644
--- a/indra/edit-me-to-trigger-new-build.txt
+++ b/indra/edit-me-to-trigger-new-build.txt
@@ -1,3 +1,4 @@
 euclid 5/29/2020
 euclid 7/23/2020
-euclid 4/29/2021
-\ No newline at end of file
+euclid 4/29/2021
+euclid 10/5/2021 DRTVWR-546
diff --git a/indra/llcharacter/lljoint.cpp b/indra/llcharacter/lljoint.cpp
index dee642310e..d72282ab42 100644
--- a/indra/llcharacter/lljoint.cpp
+++ b/indra/llcharacter/lljoint.cpp
@@ -922,6 +922,13 @@ const LLMatrix4 &LLJoint::getWorldMatrix()
 	return mXform.getWorldMatrix();
 }
 
+const LLMatrix4a& LLJoint::getWorldMatrix4a()
+{
+    updateWorldMatrixParent();
+
+    return mWorldMatrix;
+}
+
 
 //--------------------------------------------------------------------
 // setWorldMatrix()
@@ -1003,6 +1010,7 @@ void LLJoint::updateWorldMatrix()
 	{
 		sNumUpdates++;
 		mXform.updateMatrix(FALSE);
+        mWorldMatrix.loadu(mXform.getWorldMatrix());
 		mDirtyFlags = 0x0;
 	}
 }
diff --git a/indra/llcharacter/lljoint.h b/indra/llcharacter/lljoint.h
index 1b646b641f..ba821667c7 100644
--- a/indra/llcharacter/lljoint.h
+++ b/indra/llcharacter/lljoint.h
@@ -38,6 +38,7 @@
 #include "m4math.h"
 #include "llquaternion.h"
 #include "xform.h"
+#include "llmatrix4a.h"
 
 const S32 LL_CHARACTER_MAX_JOINTS_PER_MESH = 15;
 // Need to set this to count of animate-able joints,
@@ -123,6 +124,7 @@ protected:
 
 	// explicit transformation members
 	LLXformMatrix		mXform;
+    LLMatrix4a          mWorldMatrix;
 
     LLVector3       mDefaultPosition;
     LLVector3       mDefaultScale;
@@ -259,6 +261,8 @@ public:
 	const LLMatrix4 &getWorldMatrix();
 	void setWorldMatrix( const LLMatrix4& mat );
 
+    const LLMatrix4a& getWorldMatrix4a();
+
 	void updateWorldMatrixChildren();
 	void updateWorldMatrixParent();
 
diff --git a/indra/llcommon/CMakeLists.txt b/indra/llcommon/CMakeLists.txt
index 22dfe12e40..78d6ea3090 100644
--- a/indra/llcommon/CMakeLists.txt
+++ b/indra/llcommon/CMakeLists.txt
@@ -12,6 +12,7 @@ include(JsonCpp)
 include(Copy3rdPartyLibs)
 include(ZLIB)
 include(URIPARSER)
+include(Tracy)
 
 include_directories(
     ${EXPAT_INCLUDE_DIRS}
@@ -19,6 +20,7 @@ include_directories(
     ${JSONCPP_INCLUDE_DIR}
     ${ZLIB_INCLUDE_DIRS}
     ${URIPARSER_INCLUDE_DIRS}
+    ${TRACY_INCLUDE_DIR}
     )
 
 # add_executable(lltreeiterators lltreeiterators.cpp)
@@ -117,14 +119,16 @@ set(llcommon_SOURCE_FILES
     lluriparser.cpp
     lluuid.cpp
     llworkerthread.cpp
-    timing.cpp
     u64.cpp
+    threadpool.cpp
+    workqueue.cpp
     StackWalker.cpp
     )
     
 set(llcommon_HEADER_FILES
     CMakeLists.txt
 
+    chrono.h
     ctype_workaround.h
     fix_macros.h
     indra_constants.h
@@ -197,6 +201,7 @@ set(llcommon_HEADER_FILES
     llmortician.h
     llnametable.h
     llpointer.h
+    llprofiler.h
     llpounceable.h
     llpredicate.h
     llpreprocessor.h
@@ -251,8 +256,12 @@ set(llcommon_HEADER_FILES
     lockstatic.h
     stdtypes.h
     stringize.h
+    threadpool.h
+    threadsafeschedule.h
     timer.h
+    tuple.h
     u64.h
+    workqueue.h
     StackWalker.h
     )
 
@@ -299,6 +308,7 @@ target_link_libraries(
     ${BOOST_SYSTEM_LIBRARY}
     ${GOOGLE_PERFTOOLS_LIBRARIES}
     ${URIPARSER_LIBRARIES}
+    ${TRACY_LIBRARY}
     )
 
 if (DARWIN)
@@ -355,6 +365,9 @@ if (LL_TESTS)
   LL_ADD_INTEGRATION_TEST(lluri "" "${test_libs}")
   LL_ADD_INTEGRATION_TEST(llunits "" "${test_libs}")
   LL_ADD_INTEGRATION_TEST(stringize "" "${test_libs}")
+  LL_ADD_INTEGRATION_TEST(threadsafeschedule "" "${test_libs}")
+  LL_ADD_INTEGRATION_TEST(tuple "" "${test_libs}")
+  LL_ADD_INTEGRATION_TEST(workqueue "" "${test_libs}")
 
 ## llexception_test.cpp isn't a regression test, and doesn't need to be run
 ## every build. It's to help a developer make implementation choices about
diff --git a/indra/llcommon/chrono.h b/indra/llcommon/chrono.h
new file mode 100644
index 0000000000..806e871892
--- /dev/null
+++ b/indra/llcommon/chrono.h
@@ -0,0 +1,65 @@
+/**
+ * @file   chrono.h
+ * @author Nat Goodspeed
+ * @date   2021-10-05
+ * @brief  supplement <chrono> with utility functions
+ * 
+ * $LicenseInfo:firstyear=2021&license=viewerlgpl$
+ * Copyright (c) 2021, Linden Research, Inc.
+ * $/LicenseInfo$
+ */
+
+#if ! defined(LL_CHRONO_H)
+#define LL_CHRONO_H
+
+#include <chrono>
+#include <type_traits>              // std::enable_if
+
+namespace LL
+{
+
+// time_point_cast() is derived from https://stackoverflow.com/a/35293183
+// without the iteration: we think errors in the ~1 microsecond range are
+// probably acceptable.
+
+// This variant is for the optimal case when the source and dest use the same
+// clock: that case is handled by std::chrono.
+template <typename DestTimePoint, typename SrcTimePoint,
+          typename std::enable_if<std::is_same<typename DestTimePoint::clock,
+                                               typename SrcTimePoint::clock>::value,
+                                  bool>::type = true>
+DestTimePoint time_point_cast(const SrcTimePoint& time)
+{
+    return std::chrono::time_point_cast<typename DestTimePoint::duration>(time);
+}
+
+// This variant is for when the source and dest use different clocks -- see
+// the linked StackOverflow answer, also Howard Hinnant's, for more context.
+template <typename DestTimePoint, typename SrcTimePoint,
+          typename std::enable_if<! std::is_same<typename DestTimePoint::clock,
+                                                 typename SrcTimePoint::clock>::value,
+                                  bool>::type = true>
+DestTimePoint time_point_cast(const SrcTimePoint& time)
+{
+    // The basic idea is that we must adjust the passed time_point by the
+    // difference between the clocks' epochs. But since time_point doesn't
+    // expose its epoch, we fall back on what each of them thinks is now().
+    // However, since we necessarily make sequential calls to those now()
+    // functions, the answers differ not only by the cycles spent executing
+    // those calls, but by potential OS interruptions between them. Try to
+    // reduce that error by capturing the source clock time both before and
+    // after the dest clock, and splitting the difference. Of course an
+    // interruption between two of these now() calls without a comparable
+    // interruption between the other two will skew the result, but better is
+    // more expensive.
+    const auto src_before = typename SrcTimePoint::clock::now();
+    const auto dest_now   = typename DestTimePoint::clock::now();
+    const auto src_after  = typename SrcTimePoint::clock::now();
+    const auto src_diff   = src_after - src_before;
+    const auto src_now    = src_before + src_diff / 2;
+    return dest_now + (time - src_now);
+}
+
+} // namespace LL
+
+#endif /* ! defined(LL_CHRONO_H) */
diff --git a/indra/llcommon/linden_common.h b/indra/llcommon/linden_common.h
index e5a913a6a9..a228fd22be 100644
--- a/indra/llcommon/linden_common.h
+++ b/indra/llcommon/linden_common.h
@@ -27,6 +27,14 @@
 #ifndef LL_LINDEN_COMMON_H
 #define LL_LINDEN_COMMON_H
 
+#include "llprofiler.h"
+#if TRACY_ENABLE && !defined(LL_PROFILER_ENABLE_TRACY_OPENGL)  // hooks for memory profiling
+void *tracy_aligned_malloc(size_t size, size_t alignment);
+void  tracy_aligned_free(void *memblock);
+#define _aligned_malloc(X, Y) tracy_aligned_malloc((X), (Y))
+#define _aligned_free(X)      tracy_aligned_free((X))
+#endif
+
 // *NOTE:  Please keep includes here to a minimum!
 //
 // Files included here are included in every library .cpp file and
diff --git a/indra/llcommon/llcommon.cpp b/indra/llcommon/llcommon.cpp
index 96be913d17..5d4a623bf6 100644
--- a/indra/llcommon/llcommon.cpp
+++ b/indra/llcommon/llcommon.cpp
@@ -33,6 +33,47 @@
 #include "lltracethreadrecorder.h"
 #include "llcleanup.h"
 
+#if (TRACY_ENABLE)
+// Override new/delete for tracy memory profiling
+void *operator new(size_t size)
+{
+    auto ptr = (malloc) (size);
+    if (!ptr)
+    {
+        throw std::bad_alloc();
+    }
+    TracyAlloc(ptr, size);
+    return ptr;
+}
+
+void operator delete(void *ptr) noexcept
+{
+    TracyFree(ptr);
+    (free)(ptr);
+}
+
+// C-style malloc/free can't be so easily overridden, so we define tracy versions and use
+// a pre-processor #define in linden_common.h to redirect to them. The parens around the native
+// functions below prevents recursive substitution by the preprocessor.
+//
+// Unaligned mallocs are rare in LL code but hooking them causes problems in 3p lib code (looking at
+// you, Havok), so we'll only capture the aligned version.
+
+void *tracy_aligned_malloc(size_t size, size_t alignment)
+{
+    auto ptr = ll_aligned_malloc_fallback(size, alignment);
+    if (ptr) TracyAlloc(ptr, size);
+    return ptr;
+}
+
+void tracy_aligned_free(void *memblock)
+{
+    TracyFree(memblock);
+    ll_aligned_free_fallback(memblock);
+}
+
+#endif
+
 //static
 BOOL LLCommon::sAprInitialized = FALSE;
 
diff --git a/indra/llcommon/llcond.h b/indra/llcommon/llcond.h
index e31b67d893..c08acb66a1 100644
--- a/indra/llcommon/llcond.h
+++ b/indra/llcommon/llcond.h
@@ -53,6 +53,8 @@ private:
     LLCoros::Mutex mMutex;
     // Use LLCoros::ConditionVariable for the same reason.
     LLCoros::ConditionVariable mCond;
+    using LockType = LLCoros::LockType;
+    using cv_status = LLCoros::cv_status;
 
 public:
     /// LLCond can be explicitly initialized with a specific value for mData if
@@ -65,10 +67,14 @@ public:
     LLCond(const LLCond&) = delete;
     LLCond& operator=(const LLCond&) = delete;
 
-    /// get() returns a const reference to the stored DATA. The only way to
-    /// get a non-const reference -- to modify the stored DATA -- is via
-    /// update_one() or update_all().
-    const value_type& get() const { return mData; }
+    /// get() returns the stored DATA by value -- so to use get(), DATA must
+    /// be copyable. The only way to get a non-const reference -- to modify
+    /// the stored DATA -- is via update_one() or update_all().
+    value_type get()
+    {
+        LockType lk(mMutex);
+        return mData;
+    }
 
     /**
      * Pass update_one() an invocable accepting non-const (DATA&). The
@@ -83,7 +89,7 @@ public:
     void update_one(MODIFY modify)
     {
         { // scope of lock can/should end before notify_one()
-            LLCoros::LockType lk(mMutex);
+            LockType lk(mMutex);
             modify(mData);
         }
         mCond.notify_one();
@@ -102,7 +108,7 @@ public:
     void update_all(MODIFY modify)
     {
         { // scope of lock can/should end before notify_all()
-            LLCoros::LockType lk(mMutex);
+            LockType lk(mMutex);
             modify(mData);
         }
         mCond.notify_all();
@@ -118,7 +124,7 @@ public:
     template <typename Pred>
     void wait(Pred pred)
     {
-        LLCoros::LockType lk(mMutex);
+        LockType lk(mMutex);
         // We must iterate explicitly since the predicate accepted by
         // condition_variable::wait() requires a different signature:
         // condition_variable::wait() calls its predicate with no arguments.
@@ -205,14 +211,14 @@ private:
     template <typename Clock, typename Duration, typename Pred>
     bool wait_until(const std::chrono::time_point<Clock, Duration>& timeout_time, Pred pred)
     {
-        LLCoros::LockType lk(mMutex);
+        LockType lk(mMutex);
         // We advise the caller to pass a predicate accepting (const DATA&).
         // But what if they instead pass a predicate accepting non-const
         // (DATA&)? Such a predicate could modify mData, which would be Bad.
         // Forbid that.
         while (! pred(const_cast<const value_type&>(mData)))
         {
-            if (LLCoros::cv_status::timeout == mCond.wait_until(lk, timeout_time))
+            if (cv_status::timeout == mCond.wait_until(lk, timeout_time))
             {
                 // It's possible that wait_until() timed out AND the predicate
                 // became true more or less simultaneously. Even though
diff --git a/indra/llcommon/llerror.cpp b/indra/llcommon/llerror.cpp
index 8355df9045..f7af181927 100644
--- a/indra/llcommon/llerror.cpp
+++ b/indra/llcommon/llerror.cpp
@@ -109,6 +109,7 @@ namespace {
 		virtual void recordMessage(LLError::ELevel level,
 									const std::string& message) override
 		{
+            LL_PROFILE_ZONE_SCOPED
 			int syslogPriority = LOG_CRIT;
 			switch (level) {
 				case LLError::LEVEL_DEBUG:	syslogPriority = LOG_DEBUG;	break;
@@ -166,6 +167,7 @@ namespace {
         virtual void recordMessage(LLError::ELevel level,
                                     const std::string& message) override
         {
+            LL_PROFILE_ZONE_SCOPED
             if (LLError::getAlwaysFlush())
             {
                 mFile << message << std::endl;
@@ -208,6 +210,7 @@ namespace {
 		virtual void recordMessage(LLError::ELevel level,
 					   const std::string& message) override
 		{
+            LL_PROFILE_ZONE_SCOPED
             static std::string s_ansi_error = createANSI("31"); // red
             static std::string s_ansi_warn  = createANSI("34"); // blue
             static std::string s_ansi_debug = createANSI("35"); // magenta
@@ -220,7 +223,8 @@ namespace {
 			}
             else
             {
-                 fprintf(stderr, "%s\n", message.c_str());
+                LL_PROFILE_ZONE_NAMED("fprintf");
+                fprintf(stderr, "%s\n", message.c_str());
             }
 		}
 	
@@ -229,6 +233,7 @@ namespace {
 
         LL_FORCE_INLINE void writeANSI(const std::string& ansi_code, const std::string& message)
 		{
+            LL_PROFILE_ZONE_SCOPED
             static std::string s_ansi_bold  = createANSI("1");  // bold
             static std::string s_ansi_reset = createANSI("0");  // reset
 			// ANSI color code escape sequence, message, and reset in one fprintf call
@@ -265,6 +270,7 @@ namespace {
 		virtual void recordMessage(LLError::ELevel level,
 								   const std::string& message) override
 		{
+            LL_PROFILE_ZONE_SCOPED
 			mBuffer->addLine(message);
 		}
 	
@@ -291,6 +297,7 @@ namespace {
 		virtual void recordMessage(LLError::ELevel level,
 								   const std::string& message) override
 		{
+            LL_PROFILE_ZONE_SCOPED
 			debugger_print(message);
 		}
 	};
@@ -1178,6 +1185,7 @@ namespace
 
 	void writeToRecorders(const LLError::CallSite& site, const std::string& message)
 	{
+        LL_PROFILE_ZONE_SCOPED
 		LLError::ELevel level = site.mLevel;
 		LLError::SettingsConfigPtr s = LLError::Settings::getInstance()->getSettingsConfig();
 
@@ -1311,6 +1319,7 @@ namespace LLError
 
 	bool Log::shouldLog(CallSite& site)
 	{
+        LL_PROFILE_ZONE_SCOPED
 		LLMutexTrylock lock(getMutex<LOG_MUTEX>(), 5);
 		if (!lock.isLocked())
 		{
@@ -1354,6 +1363,7 @@ namespace LLError
 
 	void Log::flush(const std::ostringstream& out, const CallSite& site)
 	{
+        LL_PROFILE_ZONE_SCOPED
 		LLMutexTrylock lock(getMutex<LOG_MUTEX>(),5);
 		if (!lock.isLocked())
 		{
diff --git a/indra/llcommon/llerror.h b/indra/llcommon/llerror.h
index d439136ca8..d06c0e2132 100644
--- a/indra/llcommon/llerror.h
+++ b/indra/llcommon/llerror.h
@@ -35,7 +35,9 @@
 
 #include "stdtypes.h"
 
+#include "llprofiler.h"
 #include "llpreprocessor.h"
+
 #include <boost/static_assert.hpp>
 
 const int LL_ERR_NOERR = 0;
@@ -348,7 +350,8 @@ typedef LLError::NoClassInfo _LL_CLASS_TO_LOG;
 // if (condition) LL_INFOS() << "True" << LL_ENDL; else LL_INFOS()() << "False" << LL_ENDL;
 
 #define lllog(level, once, ...)                                         \
-	do {                                                                \
+    do {                                                                \
+        LL_PROFILE_ZONE_NAMED("lllog");                                 \
 		const char* tags[] = {"", ##__VA_ARGS__};                       \
 		static LLError::CallSite _site(lllog_site_args_(level, once, tags)); \
 		lllog_test_()
diff --git a/indra/llcommon/llerrorcontrol.h b/indra/llcommon/llerrorcontrol.h
index e87bb7bf35..57f10b7895 100644
--- a/indra/llcommon/llerrorcontrol.h
+++ b/indra/llcommon/llerrorcontrol.h
@@ -190,6 +190,7 @@ namespace LLError
         {}
         void recordMessage(LLError::ELevel level, const std::string& message) override
         {
+            LL_PROFILE_ZONE_SCOPED
             mCallable(level, message);
         }
     private:
diff --git a/indra/llcommon/lleventfilter.h b/indra/llcommon/lleventfilter.h
index 48c2570732..7613850fb2 100644
--- a/indra/llcommon/lleventfilter.h
+++ b/indra/llcommon/lleventfilter.h
@@ -429,6 +429,8 @@ public:
     // path, then stores it to mTarget.
     virtual bool post(const LLSD& event)
     {
+        LL_PROFILE_ZONE_SCOPED
+
         // Extract the element specified by 'mPath' from 'event'. To perform a
         // generic type-appropriate store through mTarget, construct an
         // LLSDParam<T> and store that, thus engaging LLSDParam's custom
diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h
index dfc63d08a2..9bd93d7240 100644
--- a/indra/llcommon/llfasttimer.h
+++ b/indra/llcommon/llfasttimer.h
@@ -38,7 +38,10 @@
 #define LL_FAST_TIMER_ON 1
 #define LL_FASTTIMER_USE_RDTSC 1
 
+// NOTE: Also see llprofiler.h
+#if !defined(LL_PROFILER_CONFIGURATION)
 #define LL_RECORD_BLOCK_TIME(timer_stat) const LLTrace::BlockTimer& LL_GLUE_TOKENS(block_time_recorder, __LINE__)(LLTrace::timeThisBlock(timer_stat)); (void)LL_GLUE_TOKENS(block_time_recorder, __LINE__);
+#endif // LL_PROFILER_CONFIGURATION
 
 namespace LLTrace
 {
diff --git a/indra/llcommon/llframetimer.cpp b/indra/llcommon/llframetimer.cpp
index 1e9920746b..c54029e8b4 100644
--- a/indra/llcommon/llframetimer.cpp
+++ b/indra/llcommon/llframetimer.cpp
@@ -29,6 +29,11 @@
 
 #include "llframetimer.h"
 
+// We don't bother building a stand alone lib; we just need to include the one source file for Tracy support
+#if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY || LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY_FAST_TIMER
+	#include "TracyClient.cpp"
+#endif // LL_PROFILER_CONFIGURATION
+
 // Static members
 //LLTimer	LLFrameTimer::sInternalTimer;
 U64 LLFrameTimer::sStartTotalTime = totalTime();
diff --git a/indra/llcommon/llinstancetracker.h b/indra/llcommon/llinstancetracker.h
index 402333cca7..02535a59e7 100644
--- a/indra/llcommon/llinstancetracker.h
+++ b/indra/llcommon/llinstancetracker.h
@@ -83,13 +83,34 @@ class LLInstanceTracker
     typedef llthread::LockStatic<StaticData> LockStatic;
 
 public:
+    using ptr_t  = std::shared_ptr<T>;
+    using weak_t = std::weak_ptr<T>;
+
+    /**
+     * Storing a dumb T* somewhere external is a bad idea, since
+     * LLInstanceTracker subclasses are explicitly destroyed rather than
+     * managed by smart pointers. It's legal to declare stack instances of an
+     * LLInstanceTracker subclass. But it's reasonable to store a
+     * std::weak_ptr<T>, which will become invalid when the T instance is
+     * destroyed.
+     */
+    weak_t getWeak()
+    {
+        return mSelf;
+    }
+
+    static S32 instanceCount() 
+    { 
+        return LockStatic()->mMap.size(); 
+    }
+    
     // snapshot of std::pair<const KEY, std::shared_ptr<T>> pairs
     class snapshot
     {
         // It's very important that what we store in this snapshot are
         // weak_ptrs, NOT shared_ptrs. That's how we discover whether any
         // instance has been deleted during the lifespan of a snapshot.
-        typedef std::vector<std::pair<const KEY, std::weak_ptr<T>>> VectorType;
+        typedef std::vector<std::pair<const KEY, weak_t>> VectorType;
         // Dereferencing our iterator produces a std::shared_ptr for each
         // instance that still exists. Since we store weak_ptrs, that involves
         // two chained transformations:
@@ -98,7 +119,7 @@ public:
         // It is very important that we filter lazily, that is, during
         // traversal. Any one of our stored weak_ptrs might expire during
         // traversal.
-        typedef std::pair<const KEY, std::shared_ptr<T>> strong_pair;
+        typedef std::pair<const KEY, ptr_t> strong_pair;
         // Note for future reference: nat has not yet had any luck (up to
         // Boost 1.67) trying to use boost::transform_iterator with a hand-
         // coded functor, only with actual functions. In my experience, an
@@ -202,17 +223,12 @@ public:
         iterator end()   { return iterator(snapshot::end(),   key_getter); }
     };
 
-    static T* getInstance(const KEY& k)
+    static ptr_t getInstance(const KEY& k)
     {
         LockStatic lock;
         const InstanceMap& map(lock->mMap);
         typename InstanceMap::const_iterator found = map.find(k);
-        return (found == map.end()) ? NULL : found->second.get();
-    }
-
-    static S32 instanceCount() 
-    { 
-        return LockStatic()->mMap.size(); 
+        return (found == map.end()) ? NULL : found->second;
     }
 
 protected:
@@ -222,7 +238,9 @@ protected:
         // shared_ptr, so give it a no-op deleter. We store shared_ptrs in our
         // InstanceMap specifically so snapshot can store weak_ptrs so we can
         // detect deletions during traversals.
-        std::shared_ptr<T> ptr(static_cast<T*>(this), [](T*){});
+        ptr_t ptr(static_cast<T*>(this), [](T*){});
+        // save corresponding weak_ptr for future reference
+        mSelf = ptr;
         LockStatic lock;
         add_(lock, key, ptr);
     }
@@ -257,7 +275,7 @@ private:
     static std::string report(const char* key) { return report(std::string(key)); }
 
     // caller must instantiate LockStatic
-    void add_(LockStatic& lock, const KEY& key, const std::shared_ptr<T>& ptr) 
+    void add_(LockStatic& lock, const KEY& key, const ptr_t& ptr) 
     { 
         mInstanceKey = key; 
         InstanceMap& map = lock->mMap;
@@ -281,7 +299,7 @@ private:
             break;
         }
     }
-    std::shared_ptr<T> remove_(LockStatic& lock)
+    ptr_t remove_(LockStatic& lock)
     {
         InstanceMap& map = lock->mMap;
         typename InstanceMap::iterator iter = map.find(mInstanceKey);
@@ -295,6 +313,9 @@ private:
     }
 
 private:
+    // Storing a weak_ptr to self is a bit like deriving from
+    // std::enable_shared_from_this(), except more explicit.
+    weak_t mSelf;
     KEY mInstanceKey;
 };
 
@@ -326,6 +347,9 @@ class LLInstanceTracker<T, void, KEY_COLLISION_BEHAVIOR>
     typedef llthread::LockStatic<StaticData> LockStatic;
 
 public:
+    using ptr_t  = std::shared_ptr<T>;
+    using weak_t = std::weak_ptr<T>;
+
     /**
      * Storing a dumb T* somewhere external is a bad idea, since
      * LLInstanceTracker subclasses are explicitly destroyed rather than
@@ -334,12 +358,15 @@ public:
      * std::weak_ptr<T>, which will become invalid when the T instance is
      * destroyed.
      */
-    std::weak_ptr<T> getWeak()
+    weak_t getWeak()
     {
         return mSelf;
     }
     
-    static S32 instanceCount() { return LockStatic()->mSet.size(); }
+    static S32 instanceCount()
+    {
+        return LockStatic()->mSet.size();
+    }
 
     // snapshot of std::shared_ptr<T> pointers
     class snapshot
@@ -347,7 +374,7 @@ public:
         // It's very important that what we store in this snapshot are
         // weak_ptrs, NOT shared_ptrs. That's how we discover whether any
         // instance has been deleted during the lifespan of a snapshot.
-        typedef std::vector<std::weak_ptr<T>> VectorType;
+        typedef std::vector<weak_t> VectorType;
         // Dereferencing our iterator produces a std::shared_ptr for each
         // instance that still exists. Since we store weak_ptrs, that involves
         // two chained transformations:
@@ -453,7 +480,7 @@ protected:
 private:
     // Storing a weak_ptr to self is a bit like deriving from
     // std::enable_shared_from_this(), except more explicit.
-    std::weak_ptr<T> mSelf;
+    weak_t mSelf;
 };
 
 #endif
diff --git a/indra/llcommon/llleaplistener.cpp b/indra/llcommon/llleaplistener.cpp
index 3e6ce9092c..11bfec1b31 100644
--- a/indra/llcommon/llleaplistener.cpp
+++ b/indra/llcommon/llleaplistener.cpp
@@ -220,7 +220,7 @@ void LLLeapListener::getAPI(const LLSD& request) const
 {
     Response reply(LLSD(), request);
 
-    LLEventAPI* found = LLEventAPI::getInstance(request["api"]);
+    auto found = LLEventAPI::getInstance(request["api"]);
     if (found)
     {
         reply["name"] = found->getName();
diff --git a/indra/llcommon/llmemory.cpp b/indra/llcommon/llmemory.cpp
index ea84e4c1ea..849867586a 100644
--- a/indra/llcommon/llmemory.cpp
+++ b/indra/llcommon/llmemory.cpp
@@ -82,6 +82,7 @@ void LLMemory::initMaxHeapSizeGB(F32Gigabytes max_heap_size)
 //static 
 void LLMemory::updateMemoryInfo() 
 {
+	LL_PROFILE_ZONE_SCOPED
 #if LL_WINDOWS
 	PROCESS_MEMORY_COUNTERS counters;
 
@@ -145,6 +146,7 @@ void* LLMemory::tryToAlloc(void* address, U32 size)
 //static 
 void LLMemory::logMemoryInfo(BOOL update)
 {
+	LL_PROFILE_ZONE_SCOPED
 	if(update)
 	{
 		updateMemoryInfo() ;
diff --git a/indra/llcommon/llmutex.cpp b/indra/llcommon/llmutex.cpp
index 4d73c04d07..a49002b5dc 100644
--- a/indra/llcommon/llmutex.cpp
+++ b/indra/llcommon/llmutex.cpp
@@ -44,6 +44,7 @@ LLMutex::~LLMutex()
 
 void LLMutex::lock()
 {
+    LL_PROFILE_ZONE_SCOPED
 	if(isSelfLocked())
 	{ //redundant lock
 		mCount++;
@@ -65,6 +66,7 @@ void LLMutex::lock()
 
 void LLMutex::unlock()
 {
+    LL_PROFILE_ZONE_SCOPED
 	if (mCount > 0)
 	{ //not the root unlock
 		mCount--;
@@ -85,6 +87,7 @@ void LLMutex::unlock()
 
 bool LLMutex::isLocked()
 {
+    LL_PROFILE_ZONE_SCOPED
 	if (!mMutex.try_lock())
 	{
 		return true;
@@ -108,6 +111,7 @@ LLThread::id_t LLMutex::lockingThread() const
 
 bool LLMutex::trylock()
 {
+    LL_PROFILE_ZONE_SCOPED
 	if(isSelfLocked())
 	{ //redundant lock
 		mCount++;
@@ -146,17 +150,20 @@ LLCondition::~LLCondition()
 
 void LLCondition::wait()
 {
+    LL_PROFILE_ZONE_SCOPED
 	std::unique_lock< std::mutex > lock(mMutex);
 	mCond.wait(lock);
 }
 
 void LLCondition::signal()
 {
+    LL_PROFILE_ZONE_SCOPED
 	mCond.notify_one();
 }
 
 void LLCondition::broadcast()
 {
+    LL_PROFILE_ZONE_SCOPED
 	mCond.notify_all();
 }
 
@@ -166,6 +173,7 @@ LLMutexTrylock::LLMutexTrylock(LLMutex* mutex)
     : mMutex(mutex),
     mLocked(false)
 {
+    LL_PROFILE_ZONE_SCOPED
     if (mMutex)
         mLocked = mMutex->trylock();
 }
@@ -174,6 +182,7 @@ LLMutexTrylock::LLMutexTrylock(LLMutex* mutex, U32 aTries, U32 delay_ms)
     : mMutex(mutex),
     mLocked(false)
 {
+    LL_PROFILE_ZONE_SCOPED
     if (!mMutex)
         return;
 
@@ -188,6 +197,7 @@ LLMutexTrylock::LLMutexTrylock(LLMutex* mutex, U32 aTries, U32 delay_ms)
 
 LLMutexTrylock::~LLMutexTrylock()
 {
+    LL_PROFILE_ZONE_SCOPED
     if (mMutex && mLocked)
         mMutex->unlock();
 }
@@ -199,6 +209,7 @@ LLMutexTrylock::~LLMutexTrylock()
 //
 LLScopedLock::LLScopedLock(std::mutex* mutex) : mMutex(mutex)
 {
+    LL_PROFILE_ZONE_SCOPED
 	if(mutex)
 	{
 		mutex->lock();
@@ -217,6 +228,7 @@ LLScopedLock::~LLScopedLock()
 
 void LLScopedLock::unlock()
 {
+    LL_PROFILE_ZONE_SCOPED
 	if(mLocked)
 	{
 		mMutex->unlock();
diff --git a/indra/llcommon/llprofiler.h b/indra/llcommon/llprofiler.h
new file mode 100644
index 0000000000..49510df913
--- /dev/null
+++ b/indra/llcommon/llprofiler.h
@@ -0,0 +1,103 @@
+/**
+ * @file llprofiler.h
+ * @brief Wrapper for Tracy and/or other profilers
+ *
+ * $LicenseInfo:firstyear=2021&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2021, Linden Research, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
+ * $/LicenseInfo$
+ */
+
+#ifndef LL_PROFILER_H
+#define LL_PROFILER_H
+
+#define LL_PROFILER_CONFIG_NONE             0  // No profiling
+#define LL_PROFILER_CONFIG_FAST_TIMER       1  // Profiling on: Only Fast Timers
+#define LL_PROFILER_CONFIG_TRACY            2  // Profiling on: Only Tracy
+#define LL_PROFILER_CONFIG_TRACY_FAST_TIMER 3  // Profiling on: Fast Timers + Tracy
+
+#ifndef LL_PROFILER_CONFIGURATION
+#define LL_PROFILER_CONFIGURATION           LL_PROFILER_CONFIG_FAST_TIMER
+#endif
+
+#if defined(LL_PROFILER_CONFIGURATION) && (LL_PROFILER_CONFIGURATION > LL_PROFILER_CONFIG_NONE)
+    #if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY || LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY_FAST_TIMER
+        #define TRACY_ENABLE         1
+// Normally these would be enabled but we want to be able to build any viewer with Tracy enabled and run the Tracy server on another machine
+// They must be undefined in order to work across multiple machines
+//      #define TRACY_NO_BROADCAST   1
+//      #define TRACY_ONLY_LOCALHOST 1
+        #define TRACY_ONLY_IPV4      1
+        #include "Tracy.hpp"
+
+        // Mutually exclusive with detailed memory tracing
+        #define LL_PROFILER_ENABLE_TRACY_OPENGL 0
+    #endif
+
+    #if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY
+        #define LL_PROFILER_FRAME_END                   FrameMark
+        #define LL_PROFILER_SET_THREAD_NAME( name )     tracy::SetThreadName( name )
+        #define LL_RECORD_BLOCK_TIME(name)              ZoneScoped // Want descriptive names; was: ZoneNamedN( ___tracy_scoped_zone, #name, true );
+        #define LL_PROFILE_ZONE_NAMED(name)             ZoneNamedN( ___tracy_scoped_zone, name, true );
+        #define LL_PROFILE_ZONE_NAMED_COLOR(name,color) ZoneNamedNC( ___tracy_scopped_zone, name, color, true ) // RGB
+        #define LL_PROFILE_ZONE_SCOPED                  ZoneScoped
+
+        #define LL_PROFILE_ZONE_NUM( val )              ZoneValue( val )
+        #define LL_PROFILE_ZONE_TEXT( text, size )      ZoneText( text, size )
+
+        #define LL_PROFILE_ZONE_ERR(name)               LL_PROFILE_ZONE_NAMED_COLOR( name, 0XFF0000  )  // RGB yellow
+        #define LL_PROFILE_ZONE_INFO(name)              LL_PROFILE_ZONE_NAMED_COLOR( name, 0X00FFFF  )  // RGB cyan
+        #define LL_PROFILE_ZONE_WARN(name)              LL_PROFILE_ZONE_NAMED_COLOR( name, 0x0FFFF00 )  // RGB red
+    #endif
+    #if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_FAST_TIMER
+        #define LL_PROFILER_FRAME_END
+        #define LL_PROFILER_SET_THREAD_NAME( name )      (void)(name)
+        #define LL_RECORD_BLOCK_TIME(name)                                                                  const LLTrace::BlockTimer& LL_GLUE_TOKENS(block_time_recorder, __LINE__)(LLTrace::timeThisBlock(name)); (void)LL_GLUE_TOKENS(block_time_recorder, __LINE__);
+        #define LL_PROFILE_ZONE_NAMED(name)             // LL_PROFILE_ZONE_NAMED is a no-op when Tracy is disabled
+        #define LL_PROFILE_ZONE_SCOPED                  // LL_PROFILE_ZONE_SCOPED is a no-op when Tracy is disabled
+        #define LL_PROFILE_ZONE_COLOR(name,color)       // LL_RECORD_BLOCK_TIME(name)
+
+        #define LL_PROFILE_ZONE_NUM( val )              (void)( val );                // Not supported
+        #define LL_PROFILE_ZONE_TEXT( text, size )      (void)( text ); void( size ); // Not supported
+
+        #define LL_PROFILE_ZONE_ERR(name)               (void)(name); // Not supported
+        #define LL_PROFILE_ZONE_INFO(name)              (void)(name); // Not supported
+        #define LL_PROFILE_ZONE_WARN(name)              (void)(name); // Not supported
+    #endif
+    #if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY_FAST_TIMER
+        #define LL_PROFILER_FRAME_END                   FrameMark
+        #define LL_PROFILER_SET_THREAD_NAME( name )     tracy::SetThreadName( name )
+        #define LL_RECORD_BLOCK_TIME(name)              ZoneScoped                                          const LLTrace::BlockTimer& LL_GLUE_TOKENS(block_time_recorder, __LINE__)(LLTrace::timeThisBlock(name)); (void)LL_GLUE_TOKENS(block_time_recorder, __LINE__);
+        #define LL_PROFILE_ZONE_NAMED(name)             ZoneNamedN( ___tracy_scoped_zone, #name, true );
+        #define LL_PROFILE_ZONE_NAMED_COLOR(name,color) ZoneNamedNC( ___tracy_scopped_zone, name, color, true ) // RGB
+        #define LL_PROFILE_ZONE_SCOPED                  ZoneScoped
+
+        #define LL_PROFILE_ZONE_NUM( val )              ZoneValue( val )
+        #define LL_PROFILE_ZONE_TEXT( text, size )      ZoneText( text, size )
+
+        #define LL_PROFILE_ZONE_ERR(name)               LL_PROFILE_ZONE_NAMED_COLOR( name, 0XFF0000  )  // RGB yellow
+        #define LL_PROFILE_ZONE_INFO(name)              LL_PROFILE_ZONE_NAMED_COLOR( name, 0X00FFFF  )  // RGB cyan
+        #define LL_PROFILE_ZONE_WARN(name)              LL_PROFILE_ZONE_NAMED_COLOR( name, 0x0FFFF00 )  // RGB red
+    #endif
+#else
+    #define LL_PROFILER_FRAME_END
+    #define LL_PROFILER_SET_THREAD_NAME( name ) (void)(name)
+#endif // LL_PROFILER
+
+#endif // LL_PROFILER_H
diff --git a/indra/llcommon/llsdutil.cpp b/indra/llcommon/llsdutil.cpp
index eb3a96b133..c2fe15e9b7 100644
--- a/indra/llcommon/llsdutil.cpp
+++ b/indra/llcommon/llsdutil.cpp
@@ -214,6 +214,8 @@ BOOL compare_llsd_with_template(
 	const LLSD& template_llsd,
 	LLSD& resultant_llsd)
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	if (
 		llsd_to_test.isUndefined() &&
 		template_llsd.isDefined() )
@@ -335,6 +337,8 @@ bool filter_llsd_with_template(
 	const LLSD & template_llsd,
 	LLSD & resultant_llsd)
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	if (llsd_to_test.isUndefined() && template_llsd.isDefined())
 	{
 		resultant_llsd = template_llsd;
@@ -529,6 +533,8 @@ class TypeLookup
 public:
     TypeLookup()
     {
+        LL_PROFILE_ZONE_SCOPED
+
         for (const Data *di(boost::begin(typedata)), *dend(boost::end(typedata)); di != dend; ++di)
         {
             mMap[di->type] = di->name;
@@ -537,6 +543,8 @@ public:
 
     std::string lookup(LLSD::Type type) const
     {
+        LL_PROFILE_ZONE_SCOPED
+
         MapType::const_iterator found = mMap.find(type);
         if (found != mMap.end())
         {
@@ -587,6 +595,8 @@ static std::string match_types(LLSD::Type expect, // prototype.type()
                                LLSD::Type actual,        // type we're checking
                                const std::string& pfx)   // as for llsd_matches
 {
+    LL_PROFILE_ZONE_SCOPED
+
     // Trivial case: if the actual type is exactly what we expect, we're good.
     if (actual == expect)
         return "";
@@ -624,6 +634,8 @@ static std::string match_types(LLSD::Type expect, // prototype.type()
 // see docstring in .h file
 std::string llsd_matches(const LLSD& prototype, const LLSD& data, const std::string& pfx)
 {
+    LL_PROFILE_ZONE_SCOPED
+
     // An undefined prototype means that any data is valid.
     // An undefined slot in an array or map prototype means that any data
     // may fill that slot.
@@ -756,6 +768,8 @@ std::string llsd_matches(const LLSD& prototype, const LLSD& data, const std::str
 
 bool llsd_equals(const LLSD& lhs, const LLSD& rhs, int bits)
 {
+    LL_PROFILE_ZONE_SCOPED
+
     // We're comparing strict equality of LLSD representation rather than
     // performing any conversions. So if the types aren't equal, the LLSD
     // values aren't equal.
@@ -864,6 +878,8 @@ namespace llsd
 
 LLSD& drill(LLSD& blob, const LLSD& rawPath)
 {
+    LL_PROFILE_ZONE_SCOPED
+
     // Treat rawPath uniformly as an array. If it's not already an array,
     // store it as the only entry in one. (But let's say Undefined means an
     // empty array.)
@@ -889,6 +905,8 @@ LLSD& drill(LLSD& blob, const LLSD& rawPath)
     // path entry that's bad.
     for (LLSD::Integer i = 0; i < path.size(); ++i)
     {
+        LL_PROFILE_ZONE_NUM( i )
+
         const LLSD& key{path[i]};
         if (key.isString())
         {
@@ -917,6 +935,8 @@ LLSD& drill(LLSD& blob, const LLSD& rawPath)
 
 LLSD drill(const LLSD& blob, const LLSD& path)
 {
+    LL_PROFILE_ZONE_SCOPED
+
     // non-const drill() does exactly what we want. Temporarily cast away
     // const-ness and use that.
     return drill(const_cast<LLSD&>(blob), path);
@@ -929,6 +949,8 @@ LLSD drill(const LLSD& blob, const LLSD& path)
 // filter may be include to exclude/include keys in a map. 
 LLSD llsd_clone(LLSD value, LLSD filter)
 {
+    LL_PROFILE_ZONE_SCOPED
+
     LLSD clone;
     bool has_filter(filter.isMap());
 
diff --git a/indra/llcommon/llsys.cpp b/indra/llcommon/llsys.cpp
index 4e61fb8a58..6d5d043e8d 100644
--- a/indra/llcommon/llsys.cpp
+++ b/indra/llcommon/llsys.cpp
@@ -861,6 +861,7 @@ LLSD LLMemoryInfo::getStatsMap() const
 
 LLMemoryInfo& LLMemoryInfo::refresh()
 {
+	LL_PROFILE_ZONE_SCOPED
 	mStatsMap = loadStatsMap();
 
 	LL_DEBUGS("LLMemoryInfo") << "Populated mStatsMap:\n";
diff --git a/indra/llcommon/llthread.cpp b/indra/llcommon/llthread.cpp
index 6d531d842d..11f5a015f1 100644
--- a/indra/llcommon/llthread.cpp
+++ b/indra/llcommon/llthread.cpp
@@ -135,6 +135,8 @@ void LLThread::threadRun()
     set_thread_name(-1, mName.c_str());
 #endif
 
+    LL_PROFILER_SET_THREAD_NAME( mName.c_str() );
+
     // this is the first point at which we're actually running in the new thread
     mID = currentID();
 
@@ -331,6 +333,7 @@ bool LLThread::runCondition(void)
 // Stop thread execution if requested until unpaused.
 void LLThread::checkPause()
 {
+    LL_PROFILE_ZONE_SCOPED
     mDataLock->lock();
 
     // This is in a while loop because the pthread API allows for spurious wakeups.
@@ -362,17 +365,20 @@ void LLThread::setQuitting()
 // static
 LLThread::id_t LLThread::currentID()
 {
+    LL_PROFILE_ZONE_SCOPED
     return std::this_thread::get_id();
 }
 
 // static
 void LLThread::yield()
 {
+    LL_PROFILE_ZONE_SCOPED
     std::this_thread::yield();
 }
 
 void LLThread::wake()
 {
+    LL_PROFILE_ZONE_SCOPED
     mDataLock->lock();
     if(!shouldSleep())
     {
@@ -383,6 +389,7 @@ void LLThread::wake()
 
 void LLThread::wakeLocked()
 {
+    LL_PROFILE_ZONE_SCOPED
     if(!shouldSleep())
     {
         mRunCondition->signal();
@@ -391,11 +398,13 @@ void LLThread::wakeLocked()
 
 void LLThread::lockData()
 {
+    LL_PROFILE_ZONE_SCOPED
     mDataLock->lock();
 }
 
 void LLThread::unlockData()
 {
+    LL_PROFILE_ZONE_SCOPED
     mDataLock->unlock();
 }
 
diff --git a/indra/llcommon/llthreadsafequeue.h b/indra/llcommon/llthreadsafequeue.h
index 26e0d71d31..719edcd579 100644
--- a/indra/llcommon/llthreadsafequeue.h
+++ b/indra/llcommon/llthreadsafequeue.h
@@ -1,6 +1,6 @@
 /** 
  * @file llthreadsafequeue.h
- * @brief Base classes for thread, mutex and condition handling.
+ * @brief Queue protected with mutexes for cross-thread use
  *
  * $LicenseInfo:firstyear=2004&license=viewerlgpl$
  * Second Life Viewer Source Code
@@ -27,16 +27,19 @@
 #ifndef LL_LLTHREADSAFEQUEUE_H
 #define LL_LLTHREADSAFEQUEUE_H
 
-#include "llexception.h"
-#include <deque>
-#include <string>
-#include <chrono>
-#include "mutex.h"
 #include "llcoros.h"
 #include LLCOROS_MUTEX_HEADER
 #include <boost/fiber/timed_mutex.hpp>
 #include LLCOROS_CONDVAR_HEADER
+#include "llexception.h"
+#include "mutex.h"
+#include <chrono>
+#include <queue>
+#include <string>
 
+/*****************************************************************************
+*   LLThreadSafeQueue
+*****************************************************************************/
 //
 // A general queue exception.
 //
@@ -66,70 +69,108 @@ public:
 	}
 };
 
-//
-// Implements a thread safe FIFO.
-//
-template<typename ElementT>
+/**
+ * Implements a thread safe FIFO.
+ */
+// Let the default std::queue default to underlying std::deque. Override if
+// desired.
+template<typename ElementT, typename QueueT=std::queue<ElementT>>
 class LLThreadSafeQueue
 {
 public:
 	typedef ElementT value_type;
-	
-	// If the pool is set to NULL one will be allocated and managed by this
-	// queue.
+
+	// Limiting the number of pending items prevents unbounded growth of the
+	// underlying queue.
 	LLThreadSafeQueue(U32 capacity = 1024);
-	
-	// Add an element to the front of queue (will block if the queue has
+	virtual ~LLThreadSafeQueue() {}
+
+	// Add an element to the queue (will block if the queue has
 	// reached capacity).
 	//
 	// This call will raise an interrupt error if the queue is closed while
 	// the caller is blocked.
-	void pushFront(ElementT const & element);
-	
-	// Try to add an element to the front of queue without blocking. Returns
+	template <typename T>
+	void push(T&& element);
+	// legacy name
+	void pushFront(ElementT const & element) { return push(element); }
+
+	// Try to add an element to the queue without blocking. Returns
 	// true only if the element was actually added.
-	bool tryPushFront(ElementT const & element);
+	template <typename T>
+	bool tryPush(T&& element);
+	// legacy name
+	bool tryPushFront(ElementT const & element) { return tryPush(element); }
 
-	// Try to add an element to the front of queue, blocking if full but with
-	// timeout. Returns true if the element was added.
+	// Try to add an element to the queue, blocking if full but with timeout
+	// after specified duration. Returns true if the element was added.
 	// There are potentially two different timeouts involved: how long to try
 	// to lock the mutex, versus how long to wait for the queue to stop being
 	// full. Careful settings for each timeout might be orders of magnitude
 	// apart. However, this method conflates them.
+	template <typename Rep, typename Period, typename T>
+	bool tryPushFor(const std::chrono::duration<Rep, Period>& timeout,
+					T&& element);
+	// legacy name
 	template <typename Rep, typename Period>
 	bool tryPushFrontFor(const std::chrono::duration<Rep, Period>& timeout,
-						 ElementT const & element);
+						 ElementT const & element) { return tryPushFor(timeout, element); }
 
-	// Pop the element at the end of the queue (will block if the queue is
+	// Try to add an element to the queue, blocking if full but with
+	// timeout at specified time_point. Returns true if the element was added.
+	template <typename Clock, typename Duration, typename T>
+	bool tryPushUntil(const std::chrono::time_point<Clock, Duration>& until,
+					  T&& element);
+	// no legacy name because this is a newer method
+
+	// Pop the element at the head of the queue (will block if the queue is
 	// empty).
 	//
 	// This call will raise an interrupt error if the queue is closed while
 	// the caller is blocked.
-	ElementT popBack(void);
-	
-	// Pop an element from the end of the queue if there is one available.
+	ElementT pop(void);
+	// legacy name
+	ElementT popBack(void) { return pop(); }
+
+	// Pop an element from the head of the queue if there is one available.
 	// Returns true only if an element was popped.
-	bool tryPopBack(ElementT & element);
-	
+	bool tryPop(ElementT & element);
+	// legacy name
+	bool tryPopBack(ElementT & element) { return tryPop(element); }
+
+	// Pop the element at the head of the queue, blocking if empty, with
+	// timeout after specified duration. Returns true if an element was popped.
+	template <typename Rep, typename Period>
+	bool tryPopFor(const std::chrono::duration<Rep, Period>& timeout, ElementT& element);
+	// no legacy name because this is a newer method
+
+	// Pop the element at the head of the queue, blocking if empty, with
+	// timeout at specified time_point. Returns true if an element was popped.
+	template <typename Clock, typename Duration>
+	bool tryPopUntil(const std::chrono::time_point<Clock, Duration>& until,
+					 ElementT& element);
+	// no legacy name because this is a newer method
+
 	// Returns the size of the queue.
 	size_t size();
 
 	// closes the queue:
-	// - every subsequent pushFront() call will throw LLThreadSafeQueueInterrupt
-	// - every subsequent tryPushFront() call will return false
-	// - popBack() calls will return normally until the queue is drained, then
-	//   every subsequent popBack() will throw LLThreadSafeQueueInterrupt
-	// - tryPopBack() calls will return normally until the queue is drained,
-	//   then every subsequent tryPopBack() call will return false
+	// - every subsequent push() call will throw LLThreadSafeQueueInterrupt
+	// - every subsequent tryPush() call will return false
+	// - pop() calls will return normally until the queue is drained, then
+	//   every subsequent pop() will throw LLThreadSafeQueueInterrupt
+	// - tryPop() calls will return normally until the queue is drained,
+	//   then every subsequent tryPop() call will return false
 	void close();
 
-	// detect closed state
+	// producer end: are we prevented from pushing any additional items?
 	bool isClosed();
-	// inverse of isClosed()
-	explicit operator bool();
+	// consumer end: are we done, is the queue entirely drained?
+	bool done();
 
-private:
-	std::deque< ElementT > mStorage;
+protected:
+	typedef QueueT queue_type;
+	QueueT mStorage;
 	U32 mCapacity;
 	bool mClosed;
 
@@ -137,37 +178,152 @@ private:
 	typedef std::unique_lock<decltype(mLock)> lock_t;
 	boost::fibers::condition_variable_any mCapacityCond;
 	boost::fibers::condition_variable_any mEmptyCond;
-};
 
-// LLThreadSafeQueue
-//-----------------------------------------------------------------------------
+	enum pop_result { EMPTY, DONE, WAITING, POPPED };
+	// implementation logic, suitable for passing to tryLockUntil()
+	template <typename Clock, typename Duration>
+	pop_result tryPopUntil_(lock_t& lock,
+							const std::chrono::time_point<Clock, Duration>& until,
+							ElementT& element);
+	// if we're able to lock immediately, do so and run the passed callable,
+	// which must accept lock_t& and return bool
+	template <typename CALLABLE>
+	bool tryLock(CALLABLE&& callable);
+	// if we're able to lock before the passed time_point, do so and run the
+	// passed callable, which must accept lock_t& and return bool
+	template <typename Clock, typename Duration, typename CALLABLE>
+	bool tryLockUntil(const std::chrono::time_point<Clock, Duration>& until,
+					  CALLABLE&& callable);
+	// while lock is locked, really push the passed element, if we can
+	template <typename T>
+	bool push_(lock_t& lock, T&& element);
+	// while lock is locked, really pop the head element, if we can
+	pop_result pop_(lock_t& lock, ElementT& element);
+	// Is the current head element ready to pop? We say yes; subclass can
+	// override as needed.
+	virtual bool canPop(const ElementT& head) const { return true; }
+};
 
-template<typename ElementT>
-LLThreadSafeQueue<ElementT>::LLThreadSafeQueue(U32 capacity) :
+/*****************************************************************************
+*   PriorityQueueAdapter
+*****************************************************************************/
+namespace LL
+{
+    /**
+     * std::priority_queue's API is almost like std::queue, intentionally of
+     * course, but you must access the element about to pop() as top() rather
+     * than as front(). Make an adapter for use with LLThreadSafeQueue.
+     */
+    template <typename T, typename Container=std::vector<T>,
+              typename Compare=std::less<typename Container::value_type>>
+    class PriorityQueueAdapter
+    {
+    public:
+        // publish all the same types
+        typedef std::priority_queue<T, Container, Compare> queue_type;
+        typedef typename queue_type::container_type  container_type;
+        typedef typename queue_type::value_compare   value_compare;
+        typedef typename queue_type::value_type      value_type;
+        typedef typename queue_type::size_type       size_type;
+        typedef typename queue_type::reference       reference;
+        typedef typename queue_type::const_reference const_reference;
+
+        // Although std::queue defines both const and non-const front()
+        // methods, std::priority_queue defines only const top().
+        const_reference front() const { return mQ.top(); }
+        // std::priority_queue has no equivalent to back(), so it's good that
+        // LLThreadSafeQueue doesn't use it.
+
+        // All the rest of these merely forward to the corresponding
+        // queue_type methods.
+        bool empty() const                 { return mQ.empty(); }
+        size_type size() const             { return mQ.size(); }
+        void push(const value_type& value) { mQ.push(value); }
+        void push(value_type&& value)      { mQ.push(std::move(value)); }
+        template <typename... Args>
+        void emplace(Args&&... args)       { mQ.emplace(std::forward<Args>(args)...); }
+        void pop()                         { mQ.pop(); }
+
+    private:
+        queue_type mQ;
+    };
+} // namespace LL
+
+
+/*****************************************************************************
+*   LLThreadSafeQueue implementation
+*****************************************************************************/
+template<typename ElementT, typename QueueT>
+LLThreadSafeQueue<ElementT, QueueT>::LLThreadSafeQueue(U32 capacity) :
     mCapacity(capacity),
     mClosed(false)
 {
 }
 
 
-template<typename ElementT>
-void LLThreadSafeQueue<ElementT>::pushFront(ElementT const & element)
+// if we're able to lock immediately, do so and run the passed callable, which
+// must accept lock_t& and return bool
+template <typename ElementT, typename QueueT>
+template <typename CALLABLE>
+bool LLThreadSafeQueue<ElementT, QueueT>::tryLock(CALLABLE&& callable)
+{
+    lock_t lock1(mLock, std::defer_lock);
+    if (!lock1.try_lock())
+        return false;
+
+    return std::forward<CALLABLE>(callable)(lock1);
+}
+
+
+// if we're able to lock before the passed time_point, do so and run the
+// passed callable, which must accept lock_t& and return bool
+template <typename ElementT, typename QueueT>
+template <typename Clock, typename Duration, typename CALLABLE>
+bool LLThreadSafeQueue<ElementT, QueueT>::tryLockUntil(
+    const std::chrono::time_point<Clock, Duration>& until,
+    CALLABLE&& callable)
+{
+    lock_t lock1(mLock, std::defer_lock);
+    if (!lock1.try_lock_until(until))
+        return false;
+
+    return std::forward<CALLABLE>(callable)(lock1);
+}
+
+
+// while lock is locked, really push the passed element, if we can
+template <typename ElementT, typename QueueT>
+template <typename T>
+bool LLThreadSafeQueue<ElementT, QueueT>::push_(lock_t& lock, T&& element)
+{
+    if (mStorage.size() >= mCapacity)
+        return false;
+
+    mStorage.push(std::forward<T>(element));
+    lock.unlock();
+    // now that we've pushed, if somebody's been waiting to pop, signal them
+    mEmptyCond.notify_one();
+    return true;
+}
+
+
+template <typename ElementT, typename QueueT>
+template<typename T>
+void LLThreadSafeQueue<ElementT, QueueT>::push(T&& element)
 {
     lock_t lock1(mLock);
     while (true)
     {
+        // On the producer side, it doesn't matter whether the queue has been
+        // drained or not: the moment either end calls close(), further push()
+        // operations will fail.
         if (mClosed)
         {
             LLTHROW(LLThreadSafeQueueInterrupt());
         }
 
-        if (mStorage.size() < mCapacity)
-        {
-            mStorage.push_front(element);
-            lock1.unlock();
-            mEmptyCond.notify_one();
+        if (push_(lock1, std::forward<T>(element)))
             return;
-        }
 
         // Storage Full. Wait for signal.
         mCapacityCond.wait(lock1);
@@ -175,142 +331,225 @@ void LLThreadSafeQueue<ElementT>::pushFront(ElementT const & element)
 }
 
 
-template <typename ElementT>
-template <typename Rep, typename Period>
-bool LLThreadSafeQueue<ElementT>::tryPushFrontFor(const std::chrono::duration<Rep, Period>& timeout,
-                                                  ElementT const & element)
+template<typename ElementT, typename QueueT>
+template<typename T>
+bool LLThreadSafeQueue<ElementT, QueueT>::tryPush(T&& element)
 {
-    // Convert duration to time_point: passing the same timeout duration to
-    // each of multiple calls is wrong.
-    auto endpoint = std::chrono::steady_clock::now() + timeout;
+    return tryLock(
+        [this, element=std::move(element)](lock_t& lock)
+        {
+            if (mClosed)
+                return false;
+            return push_(lock, std::move(element));
+        });
+}
 
-    lock_t lock1(mLock, std::defer_lock);
-    if (!lock1.try_lock_until(endpoint))
-        return false;
 
-    while (true)
-    {
-        if (mClosed)
-        {
-            return false;
-        }
+template <typename ElementT, typename QueueT>
+template <typename Rep, typename Period, typename T>
+bool LLThreadSafeQueue<ElementT, QueueT>::tryPushFor(
+    const std::chrono::duration<Rep, Period>& timeout,
+    T&& element)
+{
+    // Convert duration to time_point: passing the same timeout duration to
+    // each of multiple calls is wrong.
+    return tryPushUntil(std::chrono::steady_clock::now() + timeout,
+                        std::forward<T>(element));
+}
 
-        if (mStorage.size() < mCapacity)
-        {
-            mStorage.push_front(element);
-            lock1.unlock();
-            mEmptyCond.notify_one();
-            return true;
-        }
 
-        // Storage Full. Wait for signal.
-        if (LLCoros::cv_status::timeout == mCapacityCond.wait_until(lock1, endpoint))
+template <typename ElementT, typename QueueT>
+template <typename Clock, typename Duration, typename T>
+bool LLThreadSafeQueue<ElementT, QueueT>::tryPushUntil(
+    const std::chrono::time_point<Clock, Duration>& until,
+    T&& element)
+{
+    return tryLockUntil(
+        until,
+        [this, until, element=std::move(element)](lock_t& lock)
         {
-            // timed out -- formally we might recheck both conditions above
-            return false;
-        }
-        // If we didn't time out, we were notified for some reason. Loop back
-        // to check.
-    }
+            while (true)
+            {
+                if (mClosed)
+                {
+                    return false;
+                }
+
+                if (push_(lock, std::move(element)))
+                    return true;
+
+                // Storage Full. Wait for signal.
+                if (LLCoros::cv_status::timeout == mCapacityCond.wait_until(lock, until))
+                {
+                    // timed out -- formally we might recheck both conditions above
+                    return false;
+                }
+                // If we didn't time out, we were notified for some reason. Loop back
+                // to check.
+            }
+        });
 }
 
 
-template<typename ElementT>
-bool LLThreadSafeQueue<ElementT>::tryPushFront(ElementT const & element)
+// while lock is locked, really pop the head element, if we can
+template <typename ElementT, typename QueueT>
+typename LLThreadSafeQueue<ElementT, QueueT>::pop_result
+LLThreadSafeQueue<ElementT, QueueT>::pop_(lock_t& lock, ElementT& element)
 {
-    lock_t lock1(mLock, std::defer_lock);
-    if (!lock1.try_lock())
-        return false;
-
-    if (mClosed)
-        return false;
+    // If mStorage is empty, there's no head element.
+    if (mStorage.empty())
+        return mClosed? DONE : EMPTY;
 
-    if (mStorage.size() >= mCapacity)
-        return false;
+    // If there's a head element, pass it to canPop() to see if it's ready to pop. 
+    if (! canPop(mStorage.front()))
+        return WAITING;
 
-    mStorage.push_front(element);
-    lock1.unlock();
-    mEmptyCond.notify_one();
-    return true;
+    // std::queue::front() is the element about to pop()
+    element = mStorage.front();
+    mStorage.pop();
+    lock.unlock();
+    // now that we've popped, if somebody's been waiting to push, signal them
+    mCapacityCond.notify_one();
+    return POPPED;
 }
 
 
-template<typename ElementT>
-ElementT LLThreadSafeQueue<ElementT>::popBack(void)
+template<typename ElementT, typename QueueT>
+ElementT LLThreadSafeQueue<ElementT, QueueT>::pop(void)
 {
     lock_t lock1(mLock);
+    ElementT value;
     while (true)
     {
-        if (!mStorage.empty())
-        {
-            ElementT value = mStorage.back();
-            mStorage.pop_back();
-            lock1.unlock();
-            mCapacityCond.notify_one();
-            return value;
-        }
-
-        if (mClosed)
+        // On the consumer side, we always try to pop before checking mClosed
+        // so we can finish draining the queue.
+        pop_result popped = pop_(lock1, value);
+        if (popped == POPPED)
+            return std::move(value);
+
+        // Once the queue is DONE, there will never be any more coming.
+        if (popped == DONE)
         {
             LLTHROW(LLThreadSafeQueueInterrupt());
         }
 
-        // Storage empty. Wait for signal.
+        // If we didn't pop because WAITING, i.e. canPop() returned false,
+        // then even if the producer end has been closed, there's still at
+        // least one item to drain: wait for it. Or we might be EMPTY, with
+        // the queue still open. Either way, wait for signal.
         mEmptyCond.wait(lock1);
     }
 }
 
 
-template<typename ElementT>
-bool LLThreadSafeQueue<ElementT>::tryPopBack(ElementT & element)
+template<typename ElementT, typename QueueT>
+bool LLThreadSafeQueue<ElementT, QueueT>::tryPop(ElementT & element)
 {
-    lock_t lock1(mLock, std::defer_lock);
-    if (!lock1.try_lock())
-        return false;
+    return tryLock(
+        [this, &element](lock_t& lock)
+        {
+            // conflate EMPTY, DONE, WAITING: tryPop() behavior when the queue
+            // is closed is implemented by simple inability to push any new
+            // elements
+            return pop_(lock, element) == POPPED;
+        });
+}
 
-    // no need to check mClosed: tryPopBack() behavior when the queue is
-    // closed is implemented by simple inability to push any new elements
-    if (mStorage.empty())
-        return false;
 
-    element = mStorage.back();
-    mStorage.pop_back();
-    lock1.unlock();
-    mCapacityCond.notify_one();
-    return true;
+template <typename ElementT, typename QueueT>
+template <typename Rep, typename Period>
+bool LLThreadSafeQueue<ElementT, QueueT>::tryPopFor(
+    const std::chrono::duration<Rep, Period>& timeout,
+    ElementT& element)
+{
+    // Convert duration to time_point: passing the same timeout duration to
+    // each of multiple calls is wrong.
+    return tryPopUntil(std::chrono::steady_clock::now() + timeout, element);
 }
 
 
-template<typename ElementT>
-size_t LLThreadSafeQueue<ElementT>::size(void)
+template <typename ElementT, typename QueueT>
+template <typename Clock, typename Duration>
+bool LLThreadSafeQueue<ElementT, QueueT>::tryPopUntil(
+    const std::chrono::time_point<Clock, Duration>& until,
+    ElementT& element)
+{
+    return tryLockUntil(
+        until,
+        [this, until, &element](lock_t& lock)
+        {
+            // conflate EMPTY, DONE, WAITING
+            return tryPopUntil_(lock, until, element) == POPPED;
+        });
+}
+
+
+// body of tryPopUntil(), called once we have the lock
+template <typename ElementT, typename QueueT>
+template <typename Clock, typename Duration>
+typename LLThreadSafeQueue<ElementT, QueueT>::pop_result
+LLThreadSafeQueue<ElementT, QueueT>::tryPopUntil_(
+    lock_t& lock,
+    const std::chrono::time_point<Clock, Duration>& until,
+    ElementT& element)
+{
+    while (true)
+    {
+        pop_result popped = pop_(lock, element);
+        if (popped == POPPED || popped == DONE)
+        {
+            // If we succeeded, great! If we've drained the last item, so be
+            // it. Either way, break the loop and tell caller.
+            return popped;
+        }
+
+        // EMPTY or WAITING: wait for signal.
+        if (LLCoros::cv_status::timeout == mEmptyCond.wait_until(lock, until))
+        {
+            // timed out -- formally we might recheck
+            // as it is, break loop
+            return popped;
+        }
+        // If we didn't time out, we were notified for some reason. Loop back
+        // to check.
+    }
+}
+
+
+template<typename ElementT, typename QueueT>
+size_t LLThreadSafeQueue<ElementT, QueueT>::size(void)
 {
     lock_t lock(mLock);
     return mStorage.size();
 }
 
-template<typename ElementT>
-void LLThreadSafeQueue<ElementT>::close()
+
+template<typename ElementT, typename QueueT>
+void LLThreadSafeQueue<ElementT, QueueT>::close()
 {
     lock_t lock(mLock);
     mClosed = true;
     lock.unlock();
-    // wake up any blocked popBack() calls
+    // wake up any blocked pop() calls
     mEmptyCond.notify_all();
-    // wake up any blocked pushFront() calls
+    // wake up any blocked push() calls
     mCapacityCond.notify_all();
 }
 
-template<typename ElementT>
-bool LLThreadSafeQueue<ElementT>::isClosed()
+
+template<typename ElementT, typename QueueT>
+bool LLThreadSafeQueue<ElementT, QueueT>::isClosed()
 {
     lock_t lock(mLock);
-    return mClosed && mStorage.size() == 0;
+    return mClosed;
 }
 
-template<typename ElementT>
-LLThreadSafeQueue<ElementT>::operator bool()
+
+template<typename ElementT, typename QueueT>
+bool LLThreadSafeQueue<ElementT, QueueT>::done()
 {
-    return ! isClosed();
+    lock_t lock(mLock);
+    return mClosed && mStorage.empty();
 }
 
 #endif
diff --git a/indra/llcommon/lluuid.h b/indra/llcommon/lluuid.h
index fe7482ba29..86a396ab06 100644
--- a/indra/llcommon/lluuid.h
+++ b/indra/llcommon/lluuid.h
@@ -184,6 +184,17 @@ struct boost::hash<LLUUID>
     }
 };
 
+// Adapt boost hash to std hash
+namespace std
+{
+    template<> struct hash<LLUUID>
+    {
+        std::size_t operator()(LLUUID const& s) const noexcept
+        {
+            return boost::hash<LLUUID>()(s);
+        }
+    };
+}
 #endif
 
 
diff --git a/indra/llcommon/tests/llinstancetracker_test.cpp b/indra/llcommon/tests/llinstancetracker_test.cpp
index 9b89159625..5daa29adf4 100644
--- a/indra/llcommon/tests/llinstancetracker_test.cpp
+++ b/indra/llcommon/tests/llinstancetracker_test.cpp
@@ -90,19 +90,19 @@ namespace tut
         {
             Keyed one("one");
             ensure_equals(Keyed::instanceCount(), 1);
-            Keyed* found = Keyed::getInstance("one");
-            ensure("couldn't find stack Keyed", found);
-            ensure_equals("found wrong Keyed instance", found, &one);
+            auto found = Keyed::getInstance("one");
+            ensure("couldn't find stack Keyed", bool(found));
+            ensure_equals("found wrong Keyed instance", found.get(), &one);
             {
                 boost::scoped_ptr<Keyed> two(new Keyed("two"));
                 ensure_equals(Keyed::instanceCount(), 2);
-                Keyed* found = Keyed::getInstance("two");
-                ensure("couldn't find heap Keyed", found);
-                ensure_equals("found wrong Keyed instance", found, two.get());
+                auto found = Keyed::getInstance("two");
+                ensure("couldn't find heap Keyed", bool(found));
+                ensure_equals("found wrong Keyed instance", found.get(), two.get());
             }
             ensure_equals(Keyed::instanceCount(), 1);
         }
-        Keyed* found = Keyed::getInstance("one");
+        auto found = Keyed::getInstance("one");
         ensure("Keyed key lives too long", ! found);
         ensure_equals(Keyed::instanceCount(), 0);
     }
diff --git a/indra/llcommon/tests/threadsafeschedule_test.cpp b/indra/llcommon/tests/threadsafeschedule_test.cpp
new file mode 100644
index 0000000000..af67b9f492
--- /dev/null
+++ b/indra/llcommon/tests/threadsafeschedule_test.cpp
@@ -0,0 +1,69 @@
+/**
+ * @file   threadsafeschedule_test.cpp
+ * @author Nat Goodspeed
+ * @date   2021-10-04
+ * @brief  Test for threadsafeschedule.
+ * 
+ * $LicenseInfo:firstyear=2021&license=viewerlgpl$
+ * Copyright (c) 2021, Linden Research, Inc.
+ * $/LicenseInfo$
+ */
+
+// Precompiled header
+#include "linden_common.h"
+// associated header
+#include "threadsafeschedule.h"
+// STL headers
+// std headers
+#include <chrono>
+// external library headers
+// other Linden headers
+#include "../test/lltut.h"
+
+using namespace std::literals::chrono_literals; // ms suffix
+using namespace std::literals::string_literals; // s suffix
+using Queue = LL::ThreadSafeSchedule<std::string>;
+
+/*****************************************************************************
+*   TUT
+*****************************************************************************/
+namespace tut
+{
+    struct threadsafeschedule_data
+    {
+        Queue queue;
+    };
+    typedef test_group<threadsafeschedule_data> threadsafeschedule_group;
+    typedef threadsafeschedule_group::object object;
+    threadsafeschedule_group threadsafeschedulegrp("threadsafeschedule");
+
+    template<> template<>
+    void object::test<1>()
+    {
+        set_test_name("push");
+        // Simply calling push() a few times might result in indeterminate
+        // delivery order if the resolution of steady_clock is coarser than
+        // the real time required for each push() call. Explicitly increment
+        // the timestamp for each one -- but since we're passing explicit
+        // timestamps, make the queue reorder them.
+        queue.push(Queue::TimeTuple(Queue::Clock::now() + 20ms, "ghi"));
+        // Given the various push() overloads, you have to match the type
+        // exactly: conversions are ambiguous.
+        queue.push("abc"s);
+        queue.push(Queue::Clock::now() + 10ms, "def");
+        queue.close();
+        auto entry = queue.pop();
+        ensure_equals("failed to pop first", std::get<0>(entry), "abc"s);
+        entry = queue.pop();
+        ensure_equals("failed to pop second", std::get<0>(entry), "def"s);
+        ensure("queue not closed", queue.isClosed());
+        ensure("queue prematurely done", ! queue.done());
+        std::string s;
+        bool popped = queue.tryPopFor(1s, s);
+        ensure("failed to pop third", popped);
+        ensure_equals("third is wrong", s, "ghi"s);
+        popped = queue.tryPop(s);
+        ensure("queue not empty", ! popped);
+        ensure("queue not done", queue.done());
+    }
+} // namespace tut
diff --git a/indra/llcommon/tests/tuple_test.cpp b/indra/llcommon/tests/tuple_test.cpp
new file mode 100644
index 0000000000..af94e2086c
--- /dev/null
+++ b/indra/llcommon/tests/tuple_test.cpp
@@ -0,0 +1,47 @@
+/**
+ * @file   tuple_test.cpp
+ * @author Nat Goodspeed
+ * @date   2021-10-04
+ * @brief  Test for tuple.
+ * 
+ * $LicenseInfo:firstyear=2021&license=viewerlgpl$
+ * Copyright (c) 2021, Linden Research, Inc.
+ * $/LicenseInfo$
+ */
+
+// Precompiled header
+#include "linden_common.h"
+// associated header
+#include "tuple.h"
+// STL headers
+// std headers
+// external library headers
+// other Linden headers
+#include "../test/lltut.h"
+
+/*****************************************************************************
+*   TUT
+*****************************************************************************/
+namespace tut
+{
+    struct tuple_data
+    {
+    };
+    typedef test_group<tuple_data> tuple_group;
+    typedef tuple_group::object object;
+    tuple_group tuplegrp("tuple");
+
+    template<> template<>
+    void object::test<1>()
+    {
+        set_test_name("tuple");
+        std::tuple<std::string, int> tup{ "abc", 17 };
+        std::tuple<int, std::string, int> ptup{ tuple_cons(34, tup) };
+        std::tuple<std::string, int> tup2;
+        int i;
+        std::tie(i, tup2) = tuple_split(ptup);
+        ensure_equals("tuple_car() fail", i, 34);
+        ensure_equals("tuple_cdr() (0) fail", std::get<0>(tup2), "abc");
+        ensure_equals("tuple_cdr() (1) fail", std::get<1>(tup2), 17);
+    }
+} // namespace tut
diff --git a/indra/llcommon/tests/workqueue_test.cpp b/indra/llcommon/tests/workqueue_test.cpp
new file mode 100644
index 0000000000..d5405400fd
--- /dev/null
+++ b/indra/llcommon/tests/workqueue_test.cpp
@@ -0,0 +1,159 @@
+/**
+ * @file   workqueue_test.cpp
+ * @author Nat Goodspeed
+ * @date   2021-10-07
+ * @brief  Test for workqueue.
+ * 
+ * $LicenseInfo:firstyear=2021&license=viewerlgpl$
+ * Copyright (c) 2021, Linden Research, Inc.
+ * $/LicenseInfo$
+ */
+
+// Precompiled header
+#include "linden_common.h"
+// associated header
+#include "workqueue.h"
+// STL headers
+// std headers
+#include <chrono>
+#include <deque>
+// external library headers
+// other Linden headers
+#include "../test/lltut.h"
+#include "llcond.h"
+#include "llstring.h"
+#include "stringize.h"
+
+using namespace LL;
+using namespace std::literals::chrono_literals; // ms suffix
+using namespace std::literals::string_literals; // s suffix
+
+/*****************************************************************************
+*   TUT
+*****************************************************************************/
+namespace tut
+{
+    struct workqueue_data
+    {
+        WorkQueue queue{"queue"};
+    };
+    typedef test_group<workqueue_data> workqueue_group;
+    typedef workqueue_group::object object;
+    workqueue_group workqueuegrp("workqueue");
+
+    template<> template<>
+    void object::test<1>()
+    {
+        set_test_name("name");
+        ensure_equals("didn't capture name", queue.getKey(), "queue");
+        ensure("not findable", WorkQueue::getInstance("queue") == queue.getWeak().lock());
+        WorkQueue q2;
+        ensure("has no name", LLStringUtil::startsWith(q2.getKey(), "WorkQueue"));
+    }
+
+    template<> template<>
+    void object::test<2>()
+    {
+        set_test_name("post");
+        bool wasRun{ false };
+        // We only get away with binding a simple bool because we're running
+        // the work on the same thread.
+        queue.post([&wasRun](){ wasRun = true; });
+        queue.close();
+        ensure("ran too soon", ! wasRun);
+        queue.runUntilClose();
+        ensure("didn't run", wasRun);
+    }
+
+    template<> template<>
+    void object::test<3>()
+    {
+        set_test_name("postEvery");
+        // record of runs
+        using Shared = std::deque<WorkQueue::TimePoint>;
+        // This is an example of how to share data between the originator of
+        // postEvery(work) and the work item itself, since usually a WorkQueue
+        // is used to dispatch work to a different thread. Neither of them
+        // should call any of LLCond's wait methods: you don't want to stall
+        // either the worker thread or the originating thread (conventionally
+        // main). Use LLCond or a subclass even if all you want to do is
+        // signal the work item that it can quit; consider LLOneShotCond.
+        LLCond<Shared> data;
+        auto start = WorkQueue::TimePoint::clock::now();
+        auto interval = 100ms;
+        queue.postEvery(
+            interval,
+            [&data, count = 0]
+            () mutable
+            {
+                // record the timestamp at which this instance is running
+                data.update_one(
+                    [](Shared& data)
+                    {
+                        data.push_back(WorkQueue::TimePoint::clock::now());
+                    });
+                // by the 3rd call, return false to stop
+                return (++count < 3);
+            });
+        // no convenient way to close() our queue while we've got a
+        // postEvery() running, so run until we think we should have exhausted
+        // the iterations
+        queue.runFor(10*interval);
+        // Take a copy of the captured deque.
+        Shared result = data.get();
+        ensure_equals("called wrong number of times", result.size(), 3);
+        // postEvery() assumes you want the first call to happen right away.
+        // Pretend our start time was (interval) earlier than that, to make
+        // our too early/too late tests uniform for all entries.
+        start -= interval;
+        for (size_t i = 0; i < result.size(); ++i)
+        {
+            auto diff = result[i] - start;
+            start += interval;
+            try
+            {
+                ensure(STRINGIZE("call " << i << " too soon"), diff >= interval);
+                ensure(STRINGIZE("call " << i << " too late"), diff < interval*1.5);
+            }
+            catch (const tut::failure&)
+            {
+                auto interval_ms = interval / 1ms;
+                auto diff_ms = diff / 1ms;
+                std::cerr << "interval " << interval_ms
+                          << "ms; diff " << diff_ms << "ms" << std::endl;
+                throw;
+            }
+        }
+    }
+
+    template<> template<>
+    void object::test<4>()
+    {
+        set_test_name("postTo");
+        WorkQueue main("main");
+        auto qptr = WorkQueue::getInstance("queue");
+        int result = 0;
+        main.postTo(
+            qptr,
+            [](){ return 17; },
+            // Note that a postTo() *callback* can safely bind a reference to
+            // a variable on the invoking thread, because the callback is run
+            // on the invoking thread.
+            [&result](int i){ result = i; });
+        // this should post the callback to main
+        qptr->runOne();
+        // this should run the callback
+        main.runOne();
+        ensure_equals("failed to run int callback", result, 17);
+
+        std::string alpha;
+        // postTo() handles arbitrary return types
+        main.postTo(
+            qptr,
+            [](){ return "abc"s; },
+            [&alpha](const std::string& s){ alpha = s; });
+        qptr->runPending();
+        main.runPending();
+        ensure_equals("failed to run string callback", alpha, "abc");
+    }
+} // namespace tut
diff --git a/indra/llcommon/threadpool.cpp b/indra/llcommon/threadpool.cpp
new file mode 100644
index 0000000000..aa7d4179a2
--- /dev/null
+++ b/indra/llcommon/threadpool.cpp
@@ -0,0 +1,75 @@
+/**
+ * @file   threadpool.cpp
+ * @author Nat Goodspeed
+ * @date   2021-10-21
+ * @brief  Implementation for threadpool.
+ * 
+ * $LicenseInfo:firstyear=2021&license=viewerlgpl$
+ * Copyright (c) 2021, Linden Research, Inc.
+ * $/LicenseInfo$
+ */
+
+// Precompiled header
+#include "linden_common.h"
+// associated header
+#include "threadpool.h"
+// STL headers
+// std headers
+// external library headers
+// other Linden headers
+#include "llerror.h"
+#include "llevents.h"
+#include "stringize.h"
+
+LL::ThreadPool::ThreadPool(const std::string& name, size_t threads):
+    mQueue(name),
+    mName("ThreadPool:" + name)
+{
+    for (size_t i = 0; i < threads; ++i)
+    {
+        std::string tname{ STRINGIZE(mName << ':' << (i+i) << '/' << threads) };
+        mThreads.emplace_back(tname, [this, tname](){ run(tname); });
+    }
+    // Listen on "LLApp", and when the app is shutting down, close the queue
+    // and join the workers.
+    LLEventPumps::instance().obtain("LLApp").listen(
+        mName,
+        [this](const LLSD& stat)
+        {
+            std::string status(stat["status"]);
+            if (status != "running")
+            {
+                // viewer is starting shutdown -- proclaim the end is nigh!
+                LL_DEBUGS("ThreadPool") << mName << " saw " << status << LL_ENDL;
+                close();
+            }
+            return false;
+        });
+}
+
+LL::ThreadPool::~ThreadPool()
+{
+    close();
+}
+
+void LL::ThreadPool::close()
+{
+    if (! mQueue.isClosed())
+    {
+        LL_DEBUGS("ThreadPool") << mName << " closing queue and joining threads" << LL_ENDL;
+        mQueue.close();
+        for (auto& pair: mThreads)
+        {
+            LL_DEBUGS("ThreadPool") << mName << " waiting on thread " << pair.first << LL_ENDL;
+            pair.second.join();
+        }
+        LL_DEBUGS("ThreadPool") << mName << " shutdown complete" << LL_ENDL;
+    }
+}
+
+void LL::ThreadPool::run(const std::string& name)
+{
+    LL_DEBUGS("ThreadPool") << name << " starting" << LL_ENDL;
+    mQueue.runUntilClose();
+    LL_DEBUGS("ThreadPool") << name << " stopping" << LL_ENDL;
+}
diff --git a/indra/llcommon/threadpool.h b/indra/llcommon/threadpool.h
new file mode 100644
index 0000000000..8f3c8514b5
--- /dev/null
+++ b/indra/llcommon/threadpool.h
@@ -0,0 +1,46 @@
+/**
+ * @file   threadpool.h
+ * @author Nat Goodspeed
+ * @date   2021-10-21
+ * @brief  ThreadPool configures a WorkQueue along with a pool of threads to
+ *         service it.
+ * 
+ * $LicenseInfo:firstyear=2021&license=viewerlgpl$
+ * Copyright (c) 2021, Linden Research, Inc.
+ * $/LicenseInfo$
+ */
+
+#if ! defined(LL_THREADPOOL_H)
+#define LL_THREADPOOL_H
+
+#include "workqueue.h"
+#include <string>
+#include <thread>
+#include <utility>                  // std::pair
+#include <vector>
+
+namespace LL
+{
+
+    class ThreadPool
+    {
+    public:
+        /**
+         * Pass ThreadPool a string name. This can be used to look up the
+         * relevant WorkQueue.
+         */
+        ThreadPool(const std::string& name, size_t threads=1);
+        ~ThreadPool();
+        void close();
+
+    private:
+        void run(const std::string& name);
+
+        WorkQueue mQueue;
+        std::string mName;
+        std::vector<std::pair<std::string, std::thread>> mThreads;
+    };
+
+} // namespace LL
+
+#endif /* ! defined(LL_THREADPOOL_H) */
diff --git a/indra/llcommon/threadsafeschedule.h b/indra/llcommon/threadsafeschedule.h
new file mode 100644
index 0000000000..c8ad23532b
--- /dev/null
+++ b/indra/llcommon/threadsafeschedule.h
@@ -0,0 +1,373 @@
+/**
+ * @file   threadsafeschedule.h
+ * @author Nat Goodspeed
+ * @date   2021-10-02
+ * @brief  ThreadSafeSchedule is an ordered queue in which every item has an
+ *         associated timestamp.
+ * 
+ * $LicenseInfo:firstyear=2021&license=viewerlgpl$
+ * Copyright (c) 2021, Linden Research, Inc.
+ * $/LicenseInfo$
+ */
+
+#if ! defined(LL_THREADSAFESCHEDULE_H)
+#define LL_THREADSAFESCHEDULE_H
+
+#include "chrono.h"
+#include "llexception.h"
+#include "llthreadsafequeue.h"
+#include "tuple.h"
+#include <chrono>
+#include <tuple>  
+
+namespace LL
+{
+    namespace ThreadSafeSchedulePrivate
+    {
+        using TimePoint = std::chrono::steady_clock::time_point;
+        // Bundle consumer's data with a TimePoint to order items by timestamp.
+        template <typename... Args>
+        using TimestampedTuple = std::tuple<TimePoint, Args...>;
+
+        // comparison functor for TimedTuples -- see TimedQueue comments
+        struct ReverseTupleOrder
+        {
+            template <typename Tuple>
+            bool operator()(const Tuple& left, const Tuple& right) const
+            {
+                return std::get<0>(left) > std::get<0>(right);
+            }
+        };
+
+        template <typename... Args>
+        using TimedQueue = PriorityQueueAdapter<
+            TimestampedTuple<Args...>,
+            // std::vector is the default storage for std::priority_queue,
+            // have to restate to specify comparison template parameter
+            std::vector<TimestampedTuple<Args...>>,
+            // std::priority_queue uses a counterintuitive comparison
+            // behavior: the default std::less comparator is used to present
+            // the *highest* value as top(). So to sort by earliest timestamp,
+            // we must invert by using >.
+            ReverseTupleOrder>;
+    } // namespace ThreadSafeSchedulePrivate
+
+    /**
+     * ThreadSafeSchedule is an ordered LLThreadSafeQueue in which every item
+     * is given an associated timestamp. That is, TimePoint is implicitly
+     * prepended to the std::tuple with the specified types.
+     *
+     * Items are popped in increasing chronological order. Moreover, any item
+     * with a timestamp in the future is held back until
+     * std::chrono::steady_clock reaches that timestamp.
+     */
+    template <typename... Args>
+    class ThreadSafeSchedule:
+        public LLThreadSafeQueue<ThreadSafeSchedulePrivate::TimestampedTuple<Args...>,
+                                 ThreadSafeSchedulePrivate::TimedQueue<Args...>>
+    {
+    public:
+        using DataTuple = std::tuple<Args...>;
+        using TimeTuple = ThreadSafeSchedulePrivate::TimestampedTuple<Args...>;
+
+    private:
+        using super = LLThreadSafeQueue<TimeTuple, ThreadSafeSchedulePrivate::TimedQueue<Args...>>;
+        using lock_t = typename super::lock_t;
+        // VS 2017 needs this due to a bug:
+        // https://developercommunity.visualstudio.com/t/cannot-access-protected-enumerator-of-enclosing-cl/203430
+        enum pop_result { EMPTY=super::EMPTY, DONE=super::DONE, WAITING=super::WAITING, POPPED=super::POPPED };
+
+    public:
+        using Closed = LLThreadSafeQueueInterrupt;
+        using TimePoint = ThreadSafeSchedulePrivate::TimePoint;
+        using Clock = TimePoint::clock;
+
+        ThreadSafeSchedule(U32 capacity=1024):
+            super(capacity)
+        {}
+
+        /*----------------------------- push() -----------------------------*/
+        /// explicitly pass TimeTuple
+        using super::push;
+
+        /// pass DataTuple with implicit now
+        // This could be ambiguous for Args with a single type. Unfortunately
+        // we can't enable_if an individual method with a condition based on
+        // the *class* template arguments, only on that method's template
+        // arguments. We could specialize this class for the single-Args case;
+        // we could minimize redundancy by breaking out a common base class...
+        void push(const DataTuple& tuple)
+        {
+            push(tuple_cons(Clock::now(), tuple));
+        }
+
+        /// individually pass each component of the TimeTuple
+        void push(const TimePoint& time, Args&&... args)
+        {
+            push(TimeTuple(time, std::forward<Args>(args)...));
+        }
+
+        /// individually pass every component except the TimePoint (implies now)
+        // This could be ambiguous if the first specified template parameter
+        // type is also TimePoint. We could try to disambiguate, but a simpler
+        // approach would be for the caller to explicitly construct DataTuple
+        // and call that overload.
+        void push(Args&&... args)
+        {
+            push(Clock::now(), std::forward<Args>(args)...);
+        }
+
+        /*--------------------------- tryPush() ----------------------------*/
+        /// explicit TimeTuple
+        using super::tryPush;
+
+        /// DataTuple with implicit now
+        bool tryPush(const DataTuple& tuple)
+        {
+            return tryPush(tuple_cons(Clock::now(), tuple));
+        }
+
+        /// individually pass components
+        bool tryPush(const TimePoint& time, Args&&... args)
+        {
+            return tryPush(TimeTuple(time, std::forward<Args>(args)...));
+        }
+
+        /// individually pass components with implicit now
+        bool tryPush(Args&&... args)
+        {
+            return tryPush(Clock::now(), std::forward<Args>(args)...);
+        }
+
+        /*-------------------------- tryPushFor() --------------------------*/
+        /// explicit TimeTuple
+        using super::tryPushFor;
+
+        /// DataTuple with implicit now
+        template <typename Rep, typename Period>
+        bool tryPushFor(const std::chrono::duration<Rep, Period>& timeout,
+                        const DataTuple& tuple)
+        {
+            return tryPushFor(timeout, tuple_cons(Clock::now(), tuple));
+        }
+
+        /// individually pass components
+        template <typename Rep, typename Period>
+        bool tryPushFor(const std::chrono::duration<Rep, Period>& timeout,
+                        const TimePoint& time, Args&&... args)
+        {
+            return tryPushFor(TimeTuple(time, std::forward<Args>(args)...));
+        }
+
+        /// individually pass components with implicit now
+        template <typename Rep, typename Period>
+        bool tryPushFor(const std::chrono::duration<Rep, Period>& timeout,
+                        Args&&... args)
+        {
+            return tryPushFor(Clock::now(), std::forward<Args>(args)...);
+        }
+
+        /*------------------------- tryPushUntil() -------------------------*/
+        /// explicit TimeTuple
+        using super::tryPushUntil;
+
+        /// DataTuple with implicit now
+        template <typename Clock, typename Duration>
+        bool tryPushUntil(const std::chrono::time_point<Clock, Duration>& until,
+                          const DataTuple& tuple)
+        {
+            return tryPushUntil(until, tuple_cons(Clock::now(), tuple));
+        }
+
+        /// individually pass components
+        template <typename Clock, typename Duration>
+        bool tryPushUntil(const std::chrono::time_point<Clock, Duration>& until,
+                          const TimePoint& time, Args&&... args)
+        {
+            return tryPushUntil(until, TimeTuple(time, std::forward<Args>(args)...));
+        }
+
+        /// individually pass components with implicit now
+        template <typename Clock, typename Duration>
+        bool tryPushUntil(const std::chrono::time_point<Clock, Duration>& until,
+                          Args&&... args)
+        {
+            return tryPushUntil(until, Clock::now(), std::forward<Args>(args)...);
+        }
+
+        /*----------------------------- pop() ------------------------------*/
+        // Our consumer may or may not care about the timestamp associated
+        // with each popped item, so we allow retrieving either DataTuple or
+        // TimeTuple. One potential use would be to observe, and possibly
+        // adjust for, the time lag between the item time and the actual
+        // current time.
+
+        /// pop DataTuple by value
+        // It would be great to notice when sizeof...(Args) == 1 and directly
+        // return the first (only) value, instead of making pop()'s caller
+        // call std::get<0>(value). See push(DataTuple) remarks for why we
+        // haven't yet jumped through those hoops.
+        DataTuple pop()
+        {
+            return tuple_cdr(popWithTime());
+        }
+
+        /// pop TimeTuple by value
+        TimeTuple popWithTime()
+        {
+            lock_t lock(super::mLock);
+            // We can't just sit around waiting forever, given that there may
+            // be items in the queue that are not yet ready but will *become*
+            // ready in the near future. So in fact, with this class, every
+            // pop() becomes a tryPopUntil(), constrained to the timestamp of
+            // the head item. It almost doesn't matter what we specify for the
+            // caller's time constraint -- all we really care about is the
+            // head item's timestamp. Since pop() and popWithTime() are
+            // defined to wait until either an item becomes available or the
+            // queue is closed, loop until one of those things happens. The
+            // constraint we pass just determines how often we'll loop while
+            // waiting.
+            TimeTuple tt;
+            while (true)
+            {
+                // Pick a point suitably far into the future.
+                TimePoint until = TimePoint::clock::now() + std::chrono::hours(24);
+                pop_result popped = tryPopUntil_(lock, until, tt);
+                if (popped == POPPED)
+                    return std::move(tt);
+
+                // DONE: throw, just as super::pop() does
+                if (popped == DONE)
+                {
+                    LLTHROW(LLThreadSafeQueueInterrupt());
+                }
+                // WAITING: we've still got items to drain.
+                // EMPTY: not closed, so it's worth waiting for more items.
+                // Either way, loop back to wait.
+            }
+        }
+
+        // We can use tryPop(TimeTuple&) just as it stands; the only behavior
+        // difference is in our canPop() override method.
+        using super::tryPop;
+
+        /// tryPop(DataTuple&)
+        bool tryPop(DataTuple& tuple)
+        {
+            TimeTuple tt;
+            if (! super::tryPop(tt))
+                return false;
+            tuple = tuple_cdr(std::move(tt));
+            return true;
+        }
+
+        /// for when Args has exactly one type
+        bool tryPop(typename std::tuple_element<1, TimeTuple>::type& value)
+        {
+            TimeTuple tt;
+            if (! super::tryPop(tt))
+                return false;
+            value = std::get<1>(std::move(tt));
+            return true;
+        }
+
+        /// tryPopFor()
+        template <typename Rep, typename Period, typename Tuple>
+        bool tryPopFor(const std::chrono::duration<Rep, Period>& timeout, Tuple& tuple)
+        {
+            // It's important to use OUR tryPopUntil() implementation, rather
+            // than delegating immediately to our base class.
+            return tryPopUntil(Clock::now() + timeout, tuple);
+        }
+
+        /// tryPopUntil(TimeTuple&)
+        template <typename Clock, typename Duration>
+        bool tryPopUntil(const std::chrono::time_point<Clock, Duration>& until,
+                         TimeTuple& tuple)
+        {
+            // super::tryPopUntil() wakes up when an item becomes available or
+            // we hit 'until', whichever comes first. Thing is, the current
+            // head of the queue could become ready sooner than either of
+            // those events, and we need to deliver it as soon as it does.
+            // Don't wait past the TimePoint of the head item.
+            // Naturally, lock the queue before peeking at mStorage.
+            return super::tryLockUntil(
+                until,
+                [this, until, &tuple](lock_t& lock)
+                {
+                    // Use our time_point_cast to allow for 'until' that's a
+                    // time_point type other than TimePoint.
+                    return POPPED ==
+                        tryPopUntil_(lock, LL::time_point_cast<TimePoint>(until), tuple);
+                });
+        }
+
+        pop_result tryPopUntil_(lock_t& lock, const TimePoint& until, TimeTuple& tuple)
+        {
+            TimePoint adjusted = until;
+            if (! super::mStorage.empty())
+            {
+                // use whichever is earlier: the head item's timestamp, or
+                // the caller's limit
+                adjusted = min(std::get<0>(super::mStorage.front()), adjusted);
+            }
+            // now delegate to base-class tryPopUntil_()
+            pop_result popped;
+            while ((popped = pop_result(super::tryPopUntil_(lock, adjusted, tuple))) == WAITING)
+            {
+                // If super::tryPopUntil_() returns WAITING, it means there's
+                // a head item, but it's not yet time. But it's worth looping
+                // back to recheck.
+            }
+            return popped;
+        }
+
+        /// tryPopUntil(DataTuple&)
+        template <typename Clock, typename Duration>
+        bool tryPopUntil(const std::chrono::time_point<Clock, Duration>& until,
+                         DataTuple& tuple)
+        {
+            TimeTuple tt;
+            if (! tryPopUntil(until, tt))
+                return false;
+            tuple = tuple_cdr(std::move(tt));
+            return true;
+        }
+
+        /// for when Args has exactly one type
+        template <typename Clock, typename Duration>
+        bool tryPopUntil(const std::chrono::time_point<Clock, Duration>& until,
+                         typename std::tuple_element<1, TimeTuple>::type& value)
+        {
+            TimeTuple tt;
+            if (! tryPopUntil(until, tt))
+                return false;
+            value = std::get<1>(std::move(tt));
+            return true;
+        }
+
+        /*------------------------------ etc. ------------------------------*/
+        // We can't hide items that aren't yet ready because we can't traverse
+        // the underlying priority_queue: it has no iterators, only top(). So
+        // a consumer could observe size() > 0 and yet tryPop() returns false.
+        // Shrug, in a multi-consumer scenario that would be expected behavior.
+        using super::size;
+        // open/closed state
+        using super::close;
+        using super::isClosed;
+        using super::done;
+
+    private:
+        // this method is called by base class pop_() every time we're
+        // considering whether to deliver the current head element
+        bool canPop(const TimeTuple& head) const override
+        {
+            // an item with a future timestamp isn't yet ready to pop
+            // (should we add some slop for overhead?)
+            return std::get<0>(head) <= Clock::now();
+        }
+    };
+
+} // namespace LL
+
+#endif /* ! defined(LL_THREADSAFESCHEDULE_H) */
diff --git a/indra/llcommon/timing.cpp b/indra/llcommon/timing.cpp
deleted file mode 100644
index c2dc695ef3..0000000000
--- a/indra/llcommon/timing.cpp
+++ /dev/null
@@ -1,25 +0,0 @@
-/** 
- * @file timing.cpp
- * @brief This file will be deprecated in the future.
- *
- * $LicenseInfo:firstyear=2000&license=viewerlgpl$
- * Second Life Viewer Source Code
- * Copyright (C) 2010, Linden Research, Inc.
- * 
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License only.
- * 
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- * 
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
- * 
- * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
- * $/LicenseInfo$
- */
diff --git a/indra/llcommon/tuple.h b/indra/llcommon/tuple.h
new file mode 100644
index 0000000000..bfe7e3c2ba
--- /dev/null
+++ b/indra/llcommon/tuple.h
@@ -0,0 +1,84 @@
+/**
+ * @file   tuple.h
+ * @author Nat Goodspeed
+ * @date   2021-10-04
+ * @brief  A couple tuple utilities
+ * 
+ * $LicenseInfo:firstyear=2021&license=viewerlgpl$
+ * Copyright (c) 2021, Linden Research, Inc.
+ * $/LicenseInfo$
+ */
+
+#if ! defined(LL_TUPLE_H)
+#define LL_TUPLE_H
+
+#include <tuple>
+#include <type_traits>              // std::remove_reference
+#include <utility>                  // std::pair
+
+/**
+ * tuple_cons() behaves like LISP cons: it uses std::tuple_cat() to prepend a
+ * new item of arbitrary type to an existing std::tuple.
+ */
+template <typename First, typename... Rest, typename Tuple_=std::tuple<Rest...>>
+auto tuple_cons(First&& first, Tuple_&& rest)
+{
+    // All we need to do is make a tuple containing 'first', and let
+    // tuple_cat() do the hard part.
+    return std::tuple_cat(std::tuple<First>(std::forward<First>(first)),
+                          std::forward<Tuple_>(rest));
+}
+
+/**
+ * tuple_car() behaves like LISP car: it extracts the first item from a
+ * std::tuple.
+ */
+template <typename... Args, typename Tuple_=std::tuple<Args...>>
+auto tuple_car(Tuple_&& tuple)
+{
+    return std::get<0>(std::forward<Tuple_>(tuple));
+}
+
+/**
+ * tuple_cdr() behaves like LISP cdr: it returns a new tuple containing
+ * everything BUT the first item.
+ */
+// derived from https://stackoverflow.com/a/24046437
+template <typename Tuple, std::size_t... Indices>
+auto tuple_cdr_(Tuple&& tuple, const std::index_sequence<Indices...>)
+{
+    // Given an index sequence from [0..N-1), extract tuple items [1..N)
+    return std::make_tuple(std::get<Indices+1u>(std::forward<Tuple>(tuple))...);
+}
+
+template <typename Tuple>
+auto tuple_cdr(Tuple&& tuple)
+{
+    return tuple_cdr_(
+        std::forward<Tuple>(tuple),
+        // Pass helper function an index sequence one item shorter than tuple
+        std::make_index_sequence<
+            std::tuple_size<
+                // tuple_size doesn't like reference types
+                typename std::remove_reference<Tuple>::type
+            >::value - 1u>
+        ());
+}
+
+/**
+ * tuple_split(), the opposite of tuple_cons(), has no direct analog in LISP.
+ * It returns a std::pair of tuple_car(), tuple_cdr(). We could call this
+ * function tuple_car_cdr(), or tuple_slice() or some such. But tuple_split()
+ * feels more descriptive.
+ */
+template <typename... Args, typename Tuple_=std::tuple<Args...>>
+auto tuple_split(Tuple_&& tuple)
+{
+    // We're not really worried about forwarding multiple times a tuple that
+    // might contain move-only items, because the implementation above only
+    // applies std::get() exactly once to each item.
+    return std::make_pair(tuple_car(std::forward<Tuple_>(tuple)),
+                          tuple_cdr(std::forward<Tuple_>(tuple)));
+}
+
+#endif /* ! defined(LL_TUPLE_H) */
diff --git a/indra/llcommon/workqueue.cpp b/indra/llcommon/workqueue.cpp
new file mode 100644
index 0000000000..114aeea1f3
--- /dev/null
+++ b/indra/llcommon/workqueue.cpp
@@ -0,0 +1,138 @@
+/**
+ * @file   workqueue.cpp
+ * @author Nat Goodspeed
+ * @date   2021-10-06
+ * @brief  Implementation for WorkQueue.
+ * 
+ * $LicenseInfo:firstyear=2021&license=viewerlgpl$
+ * Copyright (c) 2021, Linden Research, Inc.
+ * $/LicenseInfo$
+ */
+
+// Precompiled header
+#include "linden_common.h"
+// associated header
+#include "workqueue.h"
+// STL headers
+// std headers
+// external library headers
+// other Linden headers
+#include "llcoros.h"
+#include LLCOROS_MUTEX_HEADER
+#include "llerror.h"
+#include "llexception.h"
+#include "stringize.h"
+
+using Mutex = LLCoros::Mutex;
+using Lock  = LLCoros::LockType;
+
+LL::WorkQueue::WorkQueue(const std::string& name):
+    super(makeName(name))
+{
+    // TODO: register for "LLApp" events so we can implicitly close() on
+    // viewer shutdown.
+}
+
+void LL::WorkQueue::close()
+{
+    mQueue.close();
+}
+
+bool LL::WorkQueue::isClosed()
+{
+    return mQueue.isClosed();
+}
+
+bool LL::WorkQueue::done()
+{
+    return mQueue.done();
+}
+
+void LL::WorkQueue::runUntilClose()
+{
+    try
+    {
+        for (;;)
+        {
+            callWork(mQueue.pop());
+        }
+    }
+    catch (const Queue::Closed&)
+    {
+    }
+}
+
+bool LL::WorkQueue::runPending()
+{
+    for (Work work; mQueue.tryPop(work); )
+    {
+        callWork(work);
+    }
+    return ! mQueue.done();
+}
+
+bool LL::WorkQueue::runOne()
+{
+    Work work;
+    if (mQueue.tryPop(work))
+    {
+        callWork(work);
+    }
+    return ! mQueue.done();
+}
+
+bool LL::WorkQueue::runUntil(const TimePoint& until)
+{
+    // Should we subtract some slop to allow for typical Work execution time?
+    // How much slop?
+    Work work;
+    while (TimePoint::clock::now() < until && mQueue.tryPopUntil(until, work))
+    {
+        callWork(work);
+    }
+    return ! mQueue.done();
+}
+
+std::string LL::WorkQueue::makeName(const std::string& name)
+{
+    if (! name.empty())
+        return name;
+
+    static U32 discriminator = 0;
+    static Mutex mutex;
+    U32 num;
+    {
+        // Protect discriminator from concurrent access by different threads.
+        // It can't be thread_local, else two racing threads will come up with
+        // the same name.
+        Lock lk(mutex);
+        num = discriminator++;
+    }
+    return STRINGIZE("WorkQueue" << num);
+}
+
+void LL::WorkQueue::callWork(const Queue::DataTuple& work)
+{
+    // ThreadSafeSchedule::pop() always delivers a tuple, even when
+    // there's only one data field per item, as for us.
+    callWork(std::get<0>(work));
+}
+
+void LL::WorkQueue::callWork(const Work& work)
+{
+    try
+    {
+        work();
+    }
+    catch (...)
+    {
+        // No matter what goes wrong with any individual work item, the worker
+        // thread must go on! Log our own instance name with the exception.
+        LOG_UNHANDLED_EXCEPTION(getKey());
+    }
+}
+
+void LL::WorkQueue::error(const std::string& msg)
+{
+    LL_ERRS("WorkQueue") << msg << LL_ENDL;
+}
diff --git a/indra/llcommon/workqueue.h b/indra/llcommon/workqueue.h
new file mode 100644
index 0000000000..cfae2019dc
--- /dev/null
+++ b/indra/llcommon/workqueue.h
@@ -0,0 +1,334 @@
+/**
+ * @file   workqueue.h
+ * @author Nat Goodspeed
+ * @date   2021-09-30
+ * @brief  Queue used for inter-thread work passing.
+ * 
+ * $LicenseInfo:firstyear=2021&license=viewerlgpl$
+ * Copyright (c) 2021, Linden Research, Inc.
+ * $/LicenseInfo$
+ */
+
+#if ! defined(LL_WORKQUEUE_H)
+#define LL_WORKQUEUE_H
+
+#include "llinstancetracker.h"
+#include "threadsafeschedule.h"
+#include <chrono>
+#include <functional>               // std::function
+#include <queue>
+#include <string>
+#include <utility>                  // std::pair
+#include <vector>
+
+namespace LL
+{
+    /**
+     * A typical WorkQueue has a string name that can be used to find it.
+     */
+    class WorkQueue: public LLInstanceTracker<WorkQueue, std::string>
+    {
+    private:
+        using super = LLInstanceTracker<WorkQueue, std::string>;
+
+    public:
+        using Work = std::function<void()>;
+
+    private:
+        using Queue = ThreadSafeSchedule<Work>;
+        // helper for postEvery()
+        template <typename Rep, typename Period, typename CALLABLE>
+        class BackJack;
+
+    public:
+        using TimePoint = Queue::TimePoint;
+        using TimedWork = Queue::TimeTuple;
+        using Closed    = Queue::Closed;
+
+        /**
+         * You may omit the WorkQueue name, in which case a unique name is
+         * synthesized; for practical purposes that makes it anonymous.
+         */
+        WorkQueue(const std::string& name = std::string());
+
+        /**
+         * Since the point of WorkQueue is to pass work to some other worker
+         * thread(s) asynchronously, it's important that the WorkQueue continue
+         * to exist until the worker thread(s) have drained it. To communicate
+         * that it's time for them to quit, close() the queue.
+         */
+        void close();
+
+        /// producer end: are we prevented from pushing any additional items?
+        bool isClosed();
+        /// consumer end: are we done, is the queue entirely drained?
+        bool done();
+
+        /*---------------------- fire and forget API -----------------------*/
+
+        /// fire-and-forget, but at a particular (future?) time
+        template <typename CALLABLE>
+        void post(const TimePoint& time, CALLABLE&& callable)
+        {
+            // Defer reifying an arbitrary CALLABLE until we hit this method.
+            // All other methods should accept CALLABLEs of arbitrary type to
+            // avoid multiple levels of std::function indirection.
+            mQueue.push(TimedWork(time, std::move(callable)));
+        }
+
+        /// fire-and-forget
+        template <typename CALLABLE>
+        void post(CALLABLE&& callable)
+        {
+            // We use TimePoint::clock::now() instead of TimePoint's
+            // representation of the epoch because this WorkQueue may contain
+            // a mix of past-due TimedWork items and TimedWork items scheduled
+            // for the future. Sift this new item into the correct place.
+            post(TimePoint::clock::now(), std::move(callable));
+        }
+
+        /**
+         * Launch a callable returning bool that will trigger repeatedly at
+         * specified interval, until the callable returns false.
+         *
+         * If you need to signal that callable from outside, DO NOT bind a
+         * reference to a simple bool! That's not thread-safe. Instead, bind
+         * an LLCond variant, e.g. LLOneShotCond or LLBoolCond.
+         */
+        template <typename Rep, typename Period, typename CALLABLE>
+        void postEvery(const std::chrono::duration<Rep, Period>& interval,
+                       CALLABLE&& callable);
+
+        /*------------------------- handshake API --------------------------*/
+
+        /**
+         * Post work to another WorkQueue to be run at a specified time,
+         * requesting a specific callback to be run on this WorkQueue on
+         * completion.
+         *
+         * Returns true if able to post, false if the other WorkQueue is
+         * inaccessible.
+         */
+        // Apparently some Microsoft header file defines a macro CALLBACK? The
+        // natural template argument name CALLBACK produces very weird Visual
+        // Studio compile errors that seem utterly unrelated to this source
+        // code.
+        template <typename CALLABLE, typename FOLLOWUP>
+        bool postTo(WorkQueue::weak_t target,
+                    const TimePoint& time, CALLABLE&& callable, FOLLOWUP&& callback)
+        {
+            // We're being asked to post to the WorkQueue at target.
+            // target is a weak_ptr: have to lock it to check it.
+            auto tptr = target.lock();
+            if (! tptr)
+                // can't post() if the target WorkQueue has been destroyed
+                return false;
+
+            // Here we believe target WorkQueue still exists. Post to it a
+            // lambda that packages our callable, our callback and a weak_ptr
+            // to this originating WorkQueue.
+            tptr->post(
+                time,
+                [reply = super::getWeak(),
+                 callable = std::move(callable),
+                 callback = std::move(callback)]
+                ()
+                {
+                    // Call the callable in any case -- but to minimize
+                    // copying the result, immediately bind it into a reply
+                    // lambda. The reply lambda also binds the original
+                    // callback, so that when we, the originating WorkQueue,
+                    // finally receive and process the reply lambda, we'll
+                    // call the bound callback with the bound result -- on the
+                    // same thread that originally called postTo().
+                    auto rlambda =
+                        [result = callable(),
+                         callback = std::move(callback)]
+                        ()
+                        { callback(std::move(result)); };
+                    // Check if this originating WorkQueue still exists.
+                    // Remember, the outer lambda is now running on a thread
+                    // servicing the target WorkQueue, and real time has
+                    // elapsed since postTo()'s tptr->post() call.
+                    // reply is a weak_ptr: have to lock it to check it.
+                    auto rptr = reply.lock();
+                    if (rptr)
+                    {
+                        // Only post reply lambda if the originating WorkQueue
+                        // still exists. If not -- who would we tell? Log it?
+                        try
+                        {
+                            rptr->post(std::move(rlambda));
+                        }
+                        catch (const Closed&)
+                        {
+                            // Originating WorkQueue might still exist, but
+                            // might be Closed. Same thing: just discard the
+                            // callback.
+                        }
+                    }
+                });
+            // looks like we were able to post()
+            return true;
+        }
+
+        /**
+         * Post work to another WorkQueue, requesting a specific callback to
+         * be run on this WorkQueue on completion.
+         *
+         * Returns true if able to post, false if the other WorkQueue is
+         * inaccessible.
+         */
+        template <typename CALLABLE, typename FOLLOWUP>
+        bool postTo(WorkQueue::weak_t target,
+                    CALLABLE&& callable, FOLLOWUP&& callback)
+        {
+            return postTo(target, TimePoint::clock::now(), std::move(callable), std::move(callback));
+        }
+
+        /*--------------------------- worker API ---------------------------*/
+
+        /**
+         * runUntilClose() pulls TimedWork items off this WorkQueue until the
+         * queue is closed, at which point it returns. This would be the
+         * typical entry point for a simple worker thread.
+         */
+        void runUntilClose();
+
+        /**
+         * runPending() runs all TimedWork items that are ready to run. It
+         * returns true if the queue remains open, false if the queue has been
+         * closed. This could be used by a thread whose primary purpose is to
+         * serve the queue, but also wants to do other things with its idle time.
+         */
+        bool runPending();
+
+        /**
+         * runOne() runs at most one ready TimedWork item -- zero if none are
+         * ready. It returns true if the queue remains open, false if the
+         * queue has been closed.
+         */
+        bool runOne();
+
+        /**
+         * runFor() runs a subset of ready TimedWork items, until the
+         * timeslice has been exceeded. It returns true if the queue remains
+         * open, false if the queue has been closed. This could be used by a
+         * busy main thread to lend a bounded few CPU cycles to this WorkQueue
+         * without risking the WorkQueue blowing out the length of any one
+         * frame.
+         */
+        template <typename Rep, typename Period>
+        bool runFor(const std::chrono::duration<Rep, Period>& timeslice)
+        {
+            return runUntil(TimePoint::clock::now() + timeslice);
+        }
+
+        /**
+         * runUntil() is just like runFor(), only with a specific end time
+         * instead of a timeslice duration.
+         */
+        bool runUntil(const TimePoint& until);
+
+    private:
+        static void error(const std::string& msg);
+        static std::string makeName(const std::string& name);
+        void callWork(const Queue::DataTuple& work);
+        void callWork(const Work& work);
+        Queue mQueue;
+    };
+
+    /**
+     * BackJack is, in effect, a hand-rolled lambda, binding a WorkQueue, a
+     * CALLABLE that returns bool, a TimePoint and an interval at which to
+     * relaunch it. As long as the callable continues returning true, BackJack
+     * keeps resubmitting it to the target WorkQueue.
+     */
+    // Why is BackJack a class and not a lambda? Because, unlike a lambda, a
+    // class method gets its own 'this' pointer -- which we need to resubmit
+    // the whole BackJack callable.
+    template <typename Rep, typename Period, typename CALLABLE>
+    class WorkQueue::BackJack
+    {
+    public:
+        // bind the desired data
+        BackJack(WorkQueue::weak_t target,
+                 const WorkQueue::TimePoint& start,
+                 const std::chrono::duration<Rep, Period>& interval,
+                 CALLABLE&& callable):
+            mTarget(target),
+            mStart(start),
+            mInterval(interval),
+            mCallable(std::move(callable))
+        {}
+
+        // Call by target WorkQueue -- note that although WE require a
+        // callable returning bool, WorkQueue wants a void callable. We
+        // consume the bool.
+        void operator()()
+        {
+            // If mCallable() throws an exception, don't catch it here: if it
+            // throws once, it's likely to throw every time, so it's a waste
+            // of time to arrange to call it again.
+            if (mCallable())
+            {
+                // Modify mStart to the new start time we desire. If we simply
+                // added mInterval to now, we'd get actual timings of
+                // (mInterval + slop), where 'slop' is the latency between the
+                // previous mStart and the WorkQueue actually calling us.
+                // Instead, add mInterval to mStart so that at least we
+                // register our intent to fire at exact mIntervals.
+                mStart += mInterval;
+
+                // We're being called at this moment by the target WorkQueue.
+                // Assume it still exists, rather than checking the result of
+                // lock().
+                // Resubmit the whole *this callable: that's why we're a class
+                // rather than a lambda. Allow moving *this so we can carry a
+                // move-only callable; but naturally this statement must be
+                // the last time we reference this instance, which may become
+                // moved-from.
+                try
+                {
+                    mTarget.lock()->post(mStart, std::move(*this));
+                }
+                catch (const Closed&)
+                {
+                    // Once this queue is closed, oh well, just stop
+                }
+            }
+        }
+
+    private:
+        WorkQueue::weak_t mTarget;
+        WorkQueue::TimePoint mStart;
+        std::chrono::duration<Rep, Period> mInterval;
+        CALLABLE mCallable;
+    };
+
+    template <typename Rep, typename Period, typename CALLABLE>
+    void WorkQueue::postEvery(const std::chrono::duration<Rep, Period>& interval,
+                              CALLABLE&& callable)
+    {
+        if (interval.count() <= 0)
+        {
+            // It's essential that postEvery() be called with a positive
+            // interval, since each call to BackJack posts another instance of
+            // itself at (start + interval) and we order by target time. A
+            // zero or negative interval would result in that BackJack
+            // instance going to the head of the queue every time, immediately
+            // ready to run. Effectively that would produce an infinite loop,
+            // a denial of service on this WorkQueue.
+            error("postEvery(interval) may not be 0");
+        }
+        // Instantiate and post a suitable BackJack, binding a weak_ptr to
+        // self, the current time, the desired interval and the desired
+        // callable.
+        post(
+            BackJack<Rep, Period, CALLABLE>(
+                 getWeak(), TimePoint::clock::now(), interval, std::move(callable)));
+    }
+
+} // namespace LL
+
+#endif /* ! defined(LL_WORKQUEUE_H) */
diff --git a/indra/llmath/llmatrix4a.h b/indra/llmath/llmatrix4a.h
index 7ba347062f..5291a05607 100644
--- a/indra/llmath/llmatrix4a.h
+++ b/indra/llmath/llmatrix4a.h
@@ -36,6 +36,26 @@ class LLMatrix4a
 public:
 	LL_ALIGN_16(LLVector4a mMatrix[4]);
 
+    LLMatrix4a()
+    {
+
+    }
+
+    explicit LLMatrix4a(const LLMatrix4& val)
+    {
+        loadu(val);
+    }
+
+    inline F32* getF32ptr()
+    {
+        return (F32*) &mMatrix;
+    }
+
+    inline const F32* getF32ptr() const
+    {
+        return (F32*)&mMatrix;
+    }
+
 	inline void clear()
 	{
 		mMatrix[0].clear();
@@ -44,6 +64,14 @@ public:
 		mMatrix[3].clear();
 	}
 
+    inline void setIdentity()
+    {
+        mMatrix[0].set(1.f, 0.f, 0.f, 0.f);
+        mMatrix[1].set(0.f, 1.f, 0.f, 0.f);
+        mMatrix[2].set(0.f, 0.f, 1.f, 0.f);
+        mMatrix[3].set(0.f, 0.f, 0.f, 1.f);
+    }
+
 	inline void loadu(const LLMatrix4& src)
 	{
 		mMatrix[0] = _mm_loadu_ps(src.mMatrix[0]);
@@ -105,7 +133,7 @@ public:
 		mMatrix[3].setAdd(a.mMatrix[3],d3);
 	}
 
-	inline void rotate(const LLVector4a& v, LLVector4a& res)
+	inline void rotate(const LLVector4a& v, LLVector4a& res) const
 	{
 		LLVector4a y,z;
 
@@ -151,6 +179,8 @@ public:
     {
         affineTransformSSE(v,res);
     }
+
+    const LLVector4a& getTranslation() const { return mMatrix[3]; }
 };
 
 inline LLVector4a rowMul(const LLVector4a &row, const LLMatrix4a &mat)
@@ -176,6 +206,15 @@ inline void matMul(const LLMatrix4a &a, const LLMatrix4a &b, LLMatrix4a &res)
     res.mMatrix[3] = row3;
 }
 
+//Faster version of matMul wehere res must not be a or b
+inline void matMulUnsafe(const LLMatrix4a &a, const LLMatrix4a &b, LLMatrix4a &res)
+{
+    res.mMatrix[0] = rowMul(a.mMatrix[0], b);
+    res.mMatrix[1] = rowMul(a.mMatrix[1], b);
+    res.mMatrix[2] = rowMul(a.mMatrix[2], b);
+    res.mMatrix[3] = rowMul(a.mMatrix[3], b);
+}
+
 inline std::ostream& operator<<(std::ostream& s, const LLMatrix4a& m)
 {
     s << "[" << m.mMatrix[0] << ", " << m.mMatrix[1] << ", " << m.mMatrix[2] << ", " << m.mMatrix[3] << "]";
diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h
index 27abf39537..5a02928374 100644
--- a/indra/llmath/llvector4a.h
+++ b/indra/llmath/llvector4a.h
@@ -46,10 +46,9 @@ class LLRotation;
 // of this writing, July 08, 2010) about getting it implemented before you resort to
 // LLVector3/LLVector4. 
 /////////////////////////////////
-struct LLVector4a;
 
 LL_ALIGN_PREFIX(16)
-struct LLVector4a
+class LLVector4a
 {
 public:
 
@@ -138,10 +137,10 @@ public:
 	// BASIC GET/SET 
 	////////////////////////////////////
 	
-	// Return a "this" as an F32 pointer. Do not use unless you have a very good reason.  (Not sure? Ask Falcon)
+	// Return a "this" as an F32 pointer.
 	inline F32* getF32ptr();
 	
-	// Return a "this" as a const F32 pointer. Do not use unless you have a very good reason.  (Not sure? Ask Falcon)
+	// Return a "this" as a const F32 pointer.
 	inline const F32* const getF32ptr() const;
 	
 	// Read-only access a single float in this vector. Do not use in proximity to any function call that manipulates
diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl
index 69d3d01efe..8be1c1b114 100644
--- a/indra/llmath/llvector4a.inl
+++ b/indra/llmath/llvector4a.inl
@@ -58,13 +58,13 @@ inline void LLVector4a::store4a(F32* dst) const
 // BASIC GET/SET 
 ////////////////////////////////////
 
-// Return a "this" as an F32 pointer. Do not use unless you have a very good reason.  (Not sure? Ask Falcon)
+// Return a "this" as an F32 pointer.
 F32* LLVector4a::getF32ptr()
 {
 	return (F32*) &mQ;
 }
 
-// Return a "this" as a const F32 pointer. Do not use unless you have a very good reason.  (Not sure? Ask Falcon)
+// Return a "this" as a const F32 pointer.
 const F32* const LLVector4a::getF32ptr() const
 {
 	return (const F32* const) &mQ;
diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index e085fa6ada..130f30bedc 100644
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -383,6 +383,7 @@ public:
 	virtual void visit(const LLOctreeNode<LLVolumeTriangle>* branch)
 	{ //this is a depth first traversal, so it's safe to assum all children have complete
 		//bounding data
+	LL_PROFILE_ZONE_SCOPED
 
 		LLVolumeOctreeListener* node = (LLVolumeOctreeListener*) branch->getListener(0);
 
@@ -822,6 +823,8 @@ S32 LLProfile::getNumPoints(const LLProfileParams& params, BOOL path_open,F32 de
 BOOL LLProfile::generate(const LLProfileParams& params, BOOL path_open,F32 detail, S32 split,
 						 BOOL is_sculpted, S32 sculpt_size)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	if ((!mDirty) && (!is_sculpted))
 	{
 		return FALSE;
@@ -1302,6 +1305,8 @@ S32 LLPath::getNumNGonPoints(const LLPathParams& params, S32 sides, F32 startOff
 
 void LLPath::genNGon(const LLPathParams& params, S32 sides, F32 startOff, F32 end_scale, F32 twist_scale)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	// Generates a circular path, starting at (1, 0, 0), counterclockwise along the xz plane.
 	static const F32 tableScale[] = { 1, 1, 1, 0.5f, 0.707107f, 0.53f, 0.525f, 0.5f };
 
@@ -1536,6 +1541,8 @@ S32 LLPath::getNumPoints(const LLPathParams& params, F32 detail)
 BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split,
 					  BOOL is_sculpted, S32 sculpt_size)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	if ((!mDirty) && (!is_sculpted))
 	{
 		return FALSE;
@@ -2112,6 +2119,8 @@ LLVolume::~LLVolume()
 
 BOOL LLVolume::generate()
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	LL_CHECK_MEMORY
 	llassert_always(mProfilep);
 	
@@ -2370,6 +2379,8 @@ bool LLVolumeFace::VertexData::compareNormal(const LLVolumeFace::VertexData& rhs
 
 bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	//input stream is now pointing at a zlib compressed block of LLSD
 	//decompress block
 	LLSD mdl;
@@ -2755,6 +2766,8 @@ S32	LLVolume::getNumFaces() const
 
 void LLVolume::createVolumeFaces()
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	if (mGenerateSingleFace)
 	{
 		// do nothing
@@ -3720,6 +3733,8 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
 										  const LLMatrix3& norm_mat_in,
 										  S32 face_mask)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	LLMatrix4a mat;
 	mat.loadu(mat_in);
 
@@ -4846,6 +4861,8 @@ void LLVolumeFace::freeData()
 
 BOOL LLVolumeFace::create(LLVolume* volume, BOOL partial_build)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	//tree for this face is no longer valid
 	delete mOctree;
 	mOctree = NULL;
@@ -5514,6 +5531,8 @@ bool LLVolumeFace::cacheOptimize()
 
 void LLVolumeFace::createOctree(F32 scaler, const LLVector4a& center, const LLVector4a& size)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	if (mOctree)
 	{
 		return;
@@ -6287,6 +6306,8 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe
 
 void LLVolumeFace::createTangents()
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	if (!mTangents)
 	{
 		allocateTangents(mNumVertices);
@@ -6482,6 +6503,8 @@ void LLVolumeFace::fillFromLegacyData(std::vector<LLVolumeFace::VertexData>& v,
 
 BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	LL_CHECK_MEMORY
 	BOOL flat = mTypeMask & FLAT_MASK;
 
@@ -6974,6 +6997,8 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
 void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVector4a *normal,
         const LLVector2 *texcoord, U32 triangleCount, const U16* index_array, LLVector4a *tangent)
 {
+	LL_PROFILE_ZONE_SCOPED
+
     //LLVector4a *tan1 = new LLVector4a[vertexCount * 2];
 	LLVector4a* tan1 = (LLVector4a*) ll_aligned_malloc_16(vertexCount*2*sizeof(LLVector4a));
 	// new(tan1) LLVector4a;
diff --git a/indra/llmath/m4math.cpp b/indra/llmath/m4math.cpp
index 3baf1bad18..6e40dae30b 100644
--- a/indra/llmath/m4math.cpp
+++ b/indra/llmath/m4math.cpp
@@ -32,8 +32,7 @@
 #include "m4math.h"
 #include "m3math.h"
 #include "llquaternion.h"
-
-
+#include "llmatrix4a.h"
 
 
 // LLMatrix4
@@ -115,6 +114,12 @@ LLMatrix4::LLMatrix4(const LLQuaternion &q)
 	*this = initRotation(q);
 }
 
+LLMatrix4::LLMatrix4(const LLMatrix4a& mat)
+    : LLMatrix4(mat.getF32ptr())
+{
+    
+}
+
 LLMatrix4::LLMatrix4(const LLQuaternion &q, const LLVector4 &pos)
 {
 	*this = initRotTrans(q, pos);
diff --git a/indra/llmath/m4math.h b/indra/llmath/m4math.h
index bf60adb9b6..b9da970cde 100644
--- a/indra/llmath/m4math.h
+++ b/indra/llmath/m4math.h
@@ -32,6 +32,7 @@
 class LLVector4;
 class LLMatrix3;
 class LLQuaternion;
+class LLMatrix4a;
 
 // NOTA BENE: Currently assuming a right-handed, x-forward, y-left, z-up universe
 
@@ -104,6 +105,7 @@ public:
 	explicit LLMatrix4(const F32 *mat);								// Initializes Matrix to values in mat
 	explicit LLMatrix4(const LLMatrix3 &mat);						// Initializes Matrix to values in mat and sets position to (0,0,0)
 	explicit LLMatrix4(const LLQuaternion &q);						// Initializes Matrix with rotation q and sets position to (0,0,0)
+    explicit LLMatrix4(const LLMatrix4a& mat);
 
 	LLMatrix4(const LLMatrix3 &mat, const LLVector4 &pos);	// Initializes Matrix to values in mat and pos
 
diff --git a/indra/llmath/v3math.cpp b/indra/llmath/v3math.cpp
index b04c67d926..93010d2250 100644
--- a/indra/llmath/v3math.cpp
+++ b/indra/llmath/v3math.cpp
@@ -316,6 +316,12 @@ LLVector3::LLVector3(const LLVector4 &vec)
 	mV[VZ] = (F32)vec.mV[VZ];
 }
 
+LLVector3::LLVector3(const LLVector4a& vec)
+    : LLVector3(vec.getF32ptr())
+{
+
+}
+
 LLVector3::LLVector3(const LLSD& sd)
 {
 	setValue(sd);
diff --git a/indra/llmath/v3math.h b/indra/llmath/v3math.h
index 6f857d7061..068f489020 100644
--- a/indra/llmath/v3math.h
+++ b/indra/llmath/v3math.h
@@ -33,6 +33,7 @@
 #include "llsd.h"
 class LLVector2;
 class LLVector4;
+class LLVector4a;
 class LLMatrix3;
 class LLMatrix4;
 class LLVector3d;
@@ -62,7 +63,9 @@ class LLVector3
 		explicit LLVector3(const LLVector2 &vec);				// Initializes LLVector3 to (vec[0]. vec[1], 0)
 		explicit LLVector3(const LLVector3d &vec);				// Initializes LLVector3 to (vec[0]. vec[1], vec[2])
 		explicit LLVector3(const LLVector4 &vec);				// Initializes LLVector4 to (vec[0]. vec[1], vec[2])
-		explicit LLVector3(const LLSD& sd);
+        explicit LLVector3(const LLVector4a& vec);              // Initializes LLVector4 to (vec[0]. vec[1], vec[2])
+        explicit LLVector3(const LLSD& sd);
+        
 
 		LLSD getValue() const;
 
diff --git a/indra/llprimitive/lldaeloader.cpp b/indra/llprimitive/lldaeloader.cpp
index dfa29fb539..8343de0cbc 100644
--- a/indra/llprimitive/lldaeloader.cpp
+++ b/indra/llprimitive/lldaeloader.cpp
@@ -1173,17 +1173,19 @@ void LLDAELoader::processDomModel(LLModel* model, DAE* dae, daeElement* root, do
 
 			LLMeshSkinInfo& skin_info = model->mSkinInfo;
 
+            LLMatrix4 mat;
 			for (int i = 0; i < 4; i++)
 			{
 				for(int j = 0; j < 4; j++)
 				{
-					skin_info.mBindShapeMatrix.mMatrix[i][j] = dom_value[i + j*4];
+                    mat.mMatrix[i][j] = dom_value[i + j*4];
 				}
 			}
 
-			LLMatrix4 trans = normalized_transformation;
-			trans *= skin_info.mBindShapeMatrix;
-			skin_info.mBindShapeMatrix = trans;							
+            skin_info.mBindShapeMatrix.loadu(mat);
+
+			LLMatrix4a trans(normalized_transformation);
+            matMul(trans, skin_info.mBindShapeMatrix, skin_info.mBindShapeMatrix);
 		}
 
 
@@ -1401,7 +1403,7 @@ void LLDAELoader::processDomModel(LLModel* model, DAE* dae, daeElement* root, do
 									mat.mMatrix[i][j] = transform[k*16 + i + j*4];
 								}
 							}
-							model->mSkinInfo.mInvBindMatrix.push_back(mat);
+							model->mSkinInfo.mInvBindMatrix.push_back(LLMatrix4a(mat));
 						}
 					}
 				}
@@ -1475,9 +1477,9 @@ void LLDAELoader::processDomModel(LLModel* model, DAE* dae, daeElement* root, do
 			if (mJointMap.find(lookingForJoint) != mJointMap.end()
 				&& model->mSkinInfo.mInvBindMatrix.size() > i)
 			{
-				LLMatrix4 newInverse = model->mSkinInfo.mInvBindMatrix[i];
+				LLMatrix4 newInverse = LLMatrix4(model->mSkinInfo.mInvBindMatrix[i].getF32ptr());
 				newInverse.setTranslation( mJointList[lookingForJoint].getTranslation() );
-				model->mSkinInfo.mAlternateBindMatrix.push_back( newInverse );
+				model->mSkinInfo.mAlternateBindMatrix.push_back( LLMatrix4a(newInverse) );
             }
 			else
 			{
diff --git a/indra/llprimitive/llmodel.cpp b/indra/llprimitive/llmodel.cpp
index 702a1b5238..a23b991f1d 100644
--- a/indra/llprimitive/llmodel.cpp
+++ b/indra/llprimitive/llmodel.cpp
@@ -1396,7 +1396,7 @@ void LLMeshSkinInfo::fromLLSD(LLSD& skin)
 				}
 			}
 
-			mInvBindMatrix.push_back(mat);
+			mInvBindMatrix.push_back(LLMatrix4a(mat));
 		}
 
         if (mJointNames.size() != mInvBindMatrix.size())
@@ -1410,13 +1410,15 @@ void LLMeshSkinInfo::fromLLSD(LLSD& skin)
 
 	if (skin.has("bind_shape_matrix"))
 	{
+        LLMatrix4 mat;
 		for (U32 j = 0; j < 4; j++)
 		{
 			for (U32 k = 0; k < 4; k++)
 			{
-				mBindShapeMatrix.mMatrix[j][k] = skin["bind_shape_matrix"][j*4+k].asReal();
+				mat.mMatrix[j][k] = skin["bind_shape_matrix"][j*4+k].asReal();
 			}
 		}
+        mBindShapeMatrix.loadu(mat);
 	}
 
 	if (skin.has("alt_inverse_bind_matrix"))
@@ -1432,7 +1434,7 @@ void LLMeshSkinInfo::fromLLSD(LLSD& skin)
 				}
 			}
 			
-			mAlternateBindMatrix.push_back(mat);
+			mAlternateBindMatrix.push_back(LLMatrix4a(mat));
 		}
 	}
 
diff --git a/indra/llprimitive/llmodel.h b/indra/llprimitive/llmodel.h
index 51fa2f8079..96d4582b4f 100644
--- a/indra/llprimitive/llmodel.h
+++ b/indra/llprimitive/llmodel.h
@@ -33,6 +33,8 @@
 #include "m4math.h"
 #include <queue>
 
+#include <boost/align/aligned_allocator.hpp>
+
 class daeElement;
 class domMesh;
 
@@ -49,10 +51,11 @@ public:
 	LLUUID mMeshID;
 	std::vector<std::string> mJointNames;
     mutable std::vector<S32> mJointNums;
-	std::vector<LLMatrix4> mInvBindMatrix;
-	std::vector<LLMatrix4> mAlternateBindMatrix;
+    typedef std::vector<LLMatrix4a, boost::alignment::aligned_allocator<LLMatrix4a, 16>> matrix_list_t;
+	matrix_list_t mInvBindMatrix;
+	matrix_list_t mAlternateBindMatrix;
 
-	LLMatrix4 mBindShapeMatrix;
+	LLMatrix4a mBindShapeMatrix;
 	float mPelvisOffset;
     bool mLockScaleIfJointPosition;
     bool mInvalidJointsScrubbed;
diff --git a/indra/llrender/llgl.cpp b/indra/llrender/llgl.cpp
index 43fedeca64..673f6cb6df 100644
--- a/indra/llrender/llgl.cpp
+++ b/indra/llrender/llgl.cpp
@@ -434,9 +434,6 @@ LLGLManager::LLGLManager() :
 	mHasMapBufferRange(FALSE),
 	mHasFlushBufferRange(FALSE),
 	mHasPBuffer(FALSE),
-	mHasShaderObjects(FALSE),
-	mHasVertexShader(FALSE),
-	mHasFragmentShader(FALSE),
 	mNumTextureImageUnits(0),
 	mHasOcclusionQuery(FALSE),
 	mHasTimerQuery(FALSE),
@@ -775,14 +772,9 @@ bool LLGLManager::initGL()
 
 	stop_glerror();
 
-	stop_glerror();
-
-	if (mHasFragmentShader)
-	{
-		GLint num_tex_image_units;
-		glGetIntegerv(GL_MAX_TEXTURE_IMAGE_UNITS_ARB, &num_tex_image_units);
-		mNumTextureImageUnits = llmin(num_tex_image_units, 32);
-	}
+	GLint num_tex_image_units;
+	glGetIntegerv(GL_MAX_TEXTURE_IMAGE_UNITS_ARB, &num_tex_image_units);
+	mNumTextureImageUnits = llmin(num_tex_image_units, 32);
 
 	if (LLRender::sGLCoreProfile)
 	{
@@ -975,9 +967,9 @@ void LLGLManager::asLLSD(LLSD& info)
 	info["has_map_buffer_range"] = mHasMapBufferRange;
 	info["has_flush_buffer_range"] = mHasFlushBufferRange;
 	info["has_pbuffer"] = mHasPBuffer;
-	info["has_shader_objects"] = mHasShaderObjects;
-	info["has_vertex_shader"] = mHasVertexShader;
-	info["has_fragment_shader"] = mHasFragmentShader;
+    info["has_shader_objects"] = std::string("Assumed TRUE");   // was mHasShaderObjects;
+	info["has_vertex_shader"] = std::string("Assumed TRUE");    // was mHasVertexShader;
+	info["has_fragment_shader"] = std::string("Assumed TRUE");  // was mHasFragmentShader;
 	info["num_texture_image_units"] =  mNumTextureImageUnits;
 	info["has_occlusion_query"] = mHasOcclusionQuery;
 	info["has_timer_query"] = mHasTimerQuery;
@@ -1083,9 +1075,6 @@ void LLGLManager::initExtensions()
 	mHasCubeMap = FALSE;
 	mHasOcclusionQuery = FALSE;
 	mHasPointParameters = FALSE;
-	mHasShaderObjects = FALSE;
-	mHasVertexShader = FALSE;
-	mHasFragmentShader = FALSE;
 	mHasTextureRectangle = FALSE;
 #else // LL_MESA_HEADLESS //important, gGLHExts.mSysExts is uninitialized until after glh_init_extensions is called
 	mHasMultitexture = glh_init_extensions("GL_ARB_multitexture");
@@ -1143,10 +1132,6 @@ void LLGLManager::initExtensions()
 #if !LL_DARWIN
 	mHasPointParameters = !mIsATI && ExtensionExists("GL_ARB_point_parameters", gGLHExts.mSysExts);
 #endif
-	mHasShaderObjects = ExtensionExists("GL_ARB_shader_objects", gGLHExts.mSysExts) && (LLRender::sGLCoreProfile || ExtensionExists("GL_ARB_shading_language_100", gGLHExts.mSysExts));
-	mHasVertexShader = ExtensionExists("GL_ARB_vertex_program", gGLHExts.mSysExts) && ExtensionExists("GL_ARB_vertex_shader", gGLHExts.mSysExts)
-		&& (LLRender::sGLCoreProfile || ExtensionExists("GL_ARB_shading_language_100", gGLHExts.mSysExts));
-	mHasFragmentShader = ExtensionExists("GL_ARB_fragment_shader", gGLHExts.mSysExts) && (LLRender::sGLCoreProfile || ExtensionExists("GL_ARB_shading_language_100", gGLHExts.mSysExts));
 #endif
 
 #if LL_LINUX
@@ -1169,9 +1154,6 @@ void LLGLManager::initExtensions()
 		mHasCubeMap = FALSE;
 		mHasOcclusionQuery = FALSE;
 		mHasPointParameters = FALSE;
-		mHasShaderObjects = FALSE;
-		mHasVertexShader = FALSE;
-		mHasFragmentShader = FALSE;
 		LL_WARNS("RenderInit") << "GL extension support DISABLED via LL_GL_NOEXT" << LL_ENDL;
 	}
 	else if (getenv("LL_GL_BASICEXT"))	/* Flawfinder: ignore */
@@ -1184,9 +1166,6 @@ void LLGLManager::initExtensions()
 		mHasAnisotropic = FALSE;
 		//mHasCubeMap = FALSE; // apparently fatal on Intel 915 & similar
 		//mHasOcclusionQuery = FALSE; // source of many ATI system hangs
-		mHasShaderObjects = FALSE;
-		mHasVertexShader = FALSE;
-		mHasFragmentShader = FALSE;
 		mHasBlendFuncSeparate = FALSE;
 		LL_WARNS("RenderInit") << "GL extension support forced to SIMPLE level via LL_GL_BASICEXT" << LL_ENDL;
 	}
@@ -1208,9 +1187,6 @@ void LLGLManager::initExtensions()
 		if (strchr(blacklist,'j')) mHasCubeMap = FALSE;//S
 // 		if (strchr(blacklist,'k')) mHasATIVAO = FALSE;//S
 		if (strchr(blacklist,'l')) mHasOcclusionQuery = FALSE;
-		if (strchr(blacklist,'m')) mHasShaderObjects = FALSE;//S
-		if (strchr(blacklist,'n')) mHasVertexShader = FALSE;//S
-		if (strchr(blacklist,'o')) mHasFragmentShader = FALSE;//S
 		if (strchr(blacklist,'p')) mHasPointParameters = FALSE;//S
 		if (strchr(blacklist,'q')) mHasFramebufferObject = FALSE;//S
 		if (strchr(blacklist,'r')) mHasDrawBuffers = FALSE;//S
@@ -1257,18 +1233,6 @@ void LLGLManager::initExtensions()
 	{
 		LL_INFOS("RenderInit") << "Couldn't initialize GL_ARB_point_parameters" << LL_ENDL;
 	}
-	if (!mHasShaderObjects)
-	{
-		LL_INFOS("RenderInit") << "Couldn't initialize GL_ARB_shader_objects" << LL_ENDL;
-	}
-	if (!mHasVertexShader)
-	{
-		LL_INFOS("RenderInit") << "Couldn't initialize GL_ARB_vertex_shader" << LL_ENDL;
-	}
-	if (!mHasFragmentShader)
-	{
-		LL_INFOS("RenderInit") << "Couldn't initialize GL_ARB_fragment_shader" << LL_ENDL;
-	}
 	if (!mHasBlendFuncSeparate)
 	{
 		LL_INFOS("RenderInit") << "Couldn't initialize GL_EXT_blend_func_separate" << LL_ENDL;
@@ -1436,134 +1400,132 @@ void LLGLManager::initExtensions()
 		glPointParameterfARB = (PFNGLPOINTPARAMETERFARBPROC)GLH_EXT_GET_PROC_ADDRESS("glPointParameterfARB");
 		glPointParameterfvARB = (PFNGLPOINTPARAMETERFVARBPROC)GLH_EXT_GET_PROC_ADDRESS("glPointParameterfvARB");
 	}
-	if (mHasShaderObjects)
-	{
-		glDeleteObjectARB = (PFNGLDELETEOBJECTARBPROC) GLH_EXT_GET_PROC_ADDRESS("glDeleteObjectARB");
-		glGetHandleARB = (PFNGLGETHANDLEARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetHandleARB");
-		glDetachObjectARB = (PFNGLDETACHOBJECTARBPROC) GLH_EXT_GET_PROC_ADDRESS("glDetachObjectARB");
-		glCreateShaderObjectARB = (PFNGLCREATESHADEROBJECTARBPROC) GLH_EXT_GET_PROC_ADDRESS("glCreateShaderObjectARB");
-		glShaderSourceARB = (PFNGLSHADERSOURCEARBPROC) GLH_EXT_GET_PROC_ADDRESS("glShaderSourceARB");
-		glCompileShaderARB = (PFNGLCOMPILESHADERARBPROC) GLH_EXT_GET_PROC_ADDRESS("glCompileShaderARB");
-		glCreateProgramObjectARB = (PFNGLCREATEPROGRAMOBJECTARBPROC) GLH_EXT_GET_PROC_ADDRESS("glCreateProgramObjectARB");
-		glAttachObjectARB = (PFNGLATTACHOBJECTARBPROC) GLH_EXT_GET_PROC_ADDRESS("glAttachObjectARB");
-		glLinkProgramARB = (PFNGLLINKPROGRAMARBPROC) GLH_EXT_GET_PROC_ADDRESS("glLinkProgramARB");
-		glUseProgramObjectARB = (PFNGLUSEPROGRAMOBJECTARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUseProgramObjectARB");
-		glValidateProgramARB = (PFNGLVALIDATEPROGRAMARBPROC) GLH_EXT_GET_PROC_ADDRESS("glValidateProgramARB");
-		glUniform1fARB = (PFNGLUNIFORM1FARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform1fARB");
-		glUniform2fARB = (PFNGLUNIFORM2FARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform2fARB");
-		glUniform3fARB = (PFNGLUNIFORM3FARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform3fARB");
-		glUniform4fARB = (PFNGLUNIFORM4FARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform4fARB");
-		glUniform1iARB = (PFNGLUNIFORM1IARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform1iARB");
-		glUniform2iARB = (PFNGLUNIFORM2IARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform2iARB");
-		glUniform3iARB = (PFNGLUNIFORM3IARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform3iARB");
-		glUniform4iARB = (PFNGLUNIFORM4IARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform4iARB");
-		glUniform1fvARB = (PFNGLUNIFORM1FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform1fvARB");
-		glUniform2fvARB = (PFNGLUNIFORM2FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform2fvARB");
-		glUniform3fvARB = (PFNGLUNIFORM3FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform3fvARB");
-		glUniform4fvARB = (PFNGLUNIFORM4FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform4fvARB");
-		glUniform1ivARB = (PFNGLUNIFORM1IVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform1ivARB");
-		glUniform2ivARB = (PFNGLUNIFORM2IVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform2ivARB");
-		glUniform3ivARB = (PFNGLUNIFORM3IVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform3ivARB");
-		glUniform4ivARB = (PFNGLUNIFORM4IVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform4ivARB");
-		glUniformMatrix2fvARB = (PFNGLUNIFORMMATRIX2FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniformMatrix2fvARB");
-		glUniformMatrix3fvARB = (PFNGLUNIFORMMATRIX3FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniformMatrix3fvARB");
-		glUniformMatrix3x4fv = (PFNGLUNIFORMMATRIX3X4FVPROC) GLH_EXT_GET_PROC_ADDRESS("glUniformMatrix3x4fv");
-		glUniformMatrix4fvARB = (PFNGLUNIFORMMATRIX4FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniformMatrix4fvARB");
-		glGetObjectParameterfvARB = (PFNGLGETOBJECTPARAMETERFVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetObjectParameterfvARB");
-		glGetObjectParameterivARB = (PFNGLGETOBJECTPARAMETERIVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetObjectParameterivARB");
-		glGetInfoLogARB = (PFNGLGETINFOLOGARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetInfoLogARB");
-		glGetAttachedObjectsARB = (PFNGLGETATTACHEDOBJECTSARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetAttachedObjectsARB");
-		glGetUniformLocationARB = (PFNGLGETUNIFORMLOCATIONARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetUniformLocationARB");
-		glGetActiveUniformARB = (PFNGLGETACTIVEUNIFORMARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetActiveUniformARB");
-		glGetUniformfvARB = (PFNGLGETUNIFORMFVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetUniformfvARB");
-		glGetUniformivARB = (PFNGLGETUNIFORMIVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetUniformivARB");
-		glGetShaderSourceARB = (PFNGLGETSHADERSOURCEARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetShaderSourceARB");
-	}
-	if (mHasVertexShader)
-	{
-		LL_INFOS() << "initExtensions() VertexShader-related procs..." << LL_ENDL;
-
-        // nSight doesn't support use of ARB funcs that have been normalized in the API
-        if (!LLRender::sNsightDebugSupport)
-        {
-		glGetAttribLocationARB = (PFNGLGETATTRIBLOCATIONARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetAttribLocationARB");
-		glBindAttribLocationARB = (PFNGLBINDATTRIBLOCATIONARBPROC) GLH_EXT_GET_PROC_ADDRESS("glBindAttribLocationARB");
-        }
-        else
-        {
-            glGetAttribLocationARB = (PFNGLGETATTRIBLOCATIONARBPROC)GLH_EXT_GET_PROC_ADDRESS("glGetAttribLocation");
-            glBindAttribLocationARB = (PFNGLBINDATTRIBLOCATIONARBPROC)GLH_EXT_GET_PROC_ADDRESS("glBindAttribLocation");
-        }
-
-		glGetActiveAttribARB = (PFNGLGETACTIVEATTRIBARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetActiveAttribARB");
-		glVertexAttrib1dARB = (PFNGLVERTEXATTRIB1DARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib1dARB");
-		glVertexAttrib1dvARB = (PFNGLVERTEXATTRIB1DVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib1dvARB");
-		glVertexAttrib1fARB = (PFNGLVERTEXATTRIB1FARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib1fARB");
-		glVertexAttrib1fvARB = (PFNGLVERTEXATTRIB1FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib1fvARB");
-		glVertexAttrib1sARB = (PFNGLVERTEXATTRIB1SARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib1sARB");
-		glVertexAttrib1svARB = (PFNGLVERTEXATTRIB1SVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib1svARB");
-		glVertexAttrib2dARB = (PFNGLVERTEXATTRIB2DARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib2dARB");
-		glVertexAttrib2dvARB = (PFNGLVERTEXATTRIB2DVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib2dvARB");
-		glVertexAttrib2fARB = (PFNGLVERTEXATTRIB2FARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib2fARB");
-		glVertexAttrib2fvARB = (PFNGLVERTEXATTRIB2FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib2fvARB");
-		glVertexAttrib2sARB = (PFNGLVERTEXATTRIB2SARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib2sARB");
-		glVertexAttrib2svARB = (PFNGLVERTEXATTRIB2SVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib2svARB");
-		glVertexAttrib3dARB = (PFNGLVERTEXATTRIB3DARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib3dARB");
-		glVertexAttrib3dvARB = (PFNGLVERTEXATTRIB3DVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib3dvARB");
-		glVertexAttrib3fARB = (PFNGLVERTEXATTRIB3FARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib3fARB");
-		glVertexAttrib3fvARB = (PFNGLVERTEXATTRIB3FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib3fvARB");
-		glVertexAttrib3sARB = (PFNGLVERTEXATTRIB3SARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib3sARB");
-		glVertexAttrib3svARB = (PFNGLVERTEXATTRIB3SVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib3svARB");
-		glVertexAttrib4nbvARB = (PFNGLVERTEXATTRIB4NBVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4nbvARB");
-		glVertexAttrib4nivARB = (PFNGLVERTEXATTRIB4NIVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4nivARB");
-		glVertexAttrib4nsvARB = (PFNGLVERTEXATTRIB4NSVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4nsvARB");
-		glVertexAttrib4nubARB = (PFNGLVERTEXATTRIB4NUBARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4nubARB");
-		glVertexAttrib4nubvARB = (PFNGLVERTEXATTRIB4NUBVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4nubvARB");
-		glVertexAttrib4nuivARB = (PFNGLVERTEXATTRIB4NUIVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4nuivARB");
-		glVertexAttrib4nusvARB = (PFNGLVERTEXATTRIB4NUSVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4nusvARB");
-		glVertexAttrib4bvARB = (PFNGLVERTEXATTRIB4BVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4bvARB");
-		glVertexAttrib4dARB = (PFNGLVERTEXATTRIB4DARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4dARB");
-		glVertexAttrib4dvARB = (PFNGLVERTEXATTRIB4DVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4dvARB");
-		glVertexAttrib4fARB = (PFNGLVERTEXATTRIB4FARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4fARB");
-		glVertexAttrib4fvARB = (PFNGLVERTEXATTRIB4FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4fvARB");
-		glVertexAttrib4ivARB = (PFNGLVERTEXATTRIB4IVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4ivARB");
-		glVertexAttrib4sARB = (PFNGLVERTEXATTRIB4SARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4sARB");
-		glVertexAttrib4svARB = (PFNGLVERTEXATTRIB4SVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4svARB");
-		glVertexAttrib4ubvARB = (PFNGLVERTEXATTRIB4UBVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4ubvARB");
-		glVertexAttrib4uivARB = (PFNGLVERTEXATTRIB4UIVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4uivARB");
-		glVertexAttrib4usvARB = (PFNGLVERTEXATTRIB4USVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4usvARB");
-		glVertexAttribPointerARB = (PFNGLVERTEXATTRIBPOINTERARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttribPointerARB");
-		glVertexAttribIPointer = (PFNGLVERTEXATTRIBIPOINTERPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttribIPointer");
-		glEnableVertexAttribArrayARB = (PFNGLENABLEVERTEXATTRIBARRAYARBPROC) GLH_EXT_GET_PROC_ADDRESS("glEnableVertexAttribArrayARB");
-		glDisableVertexAttribArrayARB = (PFNGLDISABLEVERTEXATTRIBARRAYARBPROC) GLH_EXT_GET_PROC_ADDRESS("glDisableVertexAttribArrayARB");
-		glProgramStringARB = (PFNGLPROGRAMSTRINGARBPROC) GLH_EXT_GET_PROC_ADDRESS("glProgramStringARB");
-		glBindProgramARB = (PFNGLBINDPROGRAMARBPROC) GLH_EXT_GET_PROC_ADDRESS("glBindProgramARB");
-		glDeleteProgramsARB = (PFNGLDELETEPROGRAMSARBPROC) GLH_EXT_GET_PROC_ADDRESS("glDeleteProgramsARB");
-		glGenProgramsARB = (PFNGLGENPROGRAMSARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGenProgramsARB");
-		glProgramEnvParameter4dARB = (PFNGLPROGRAMENVPARAMETER4DARBPROC) GLH_EXT_GET_PROC_ADDRESS("glProgramEnvParameter4dARB");
-		glProgramEnvParameter4dvARB = (PFNGLPROGRAMENVPARAMETER4DVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glProgramEnvParameter4dvARB");
-		glProgramEnvParameter4fARB = (PFNGLPROGRAMENVPARAMETER4FARBPROC) GLH_EXT_GET_PROC_ADDRESS("glProgramEnvParameter4fARB");
-		glProgramEnvParameter4fvARB = (PFNGLPROGRAMENVPARAMETER4FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glProgramEnvParameter4fvARB");
-		glProgramLocalParameter4dARB = (PFNGLPROGRAMLOCALPARAMETER4DARBPROC) GLH_EXT_GET_PROC_ADDRESS("glProgramLocalParameter4dARB");
-		glProgramLocalParameter4dvARB = (PFNGLPROGRAMLOCALPARAMETER4DVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glProgramLocalParameter4dvARB");
-		glProgramLocalParameter4fARB = (PFNGLPROGRAMLOCALPARAMETER4FARBPROC) GLH_EXT_GET_PROC_ADDRESS("glProgramLocalParameter4fARB");
-		glProgramLocalParameter4fvARB = (PFNGLPROGRAMLOCALPARAMETER4FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glProgramLocalParameter4fvARB");
-		glGetProgramEnvParameterdvARB = (PFNGLGETPROGRAMENVPARAMETERDVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetProgramEnvParameterdvARB");
-		glGetProgramEnvParameterfvARB = (PFNGLGETPROGRAMENVPARAMETERFVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetProgramEnvParameterfvARB");
-		glGetProgramLocalParameterdvARB = (PFNGLGETPROGRAMLOCALPARAMETERDVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetProgramLocalParameterdvARB");
-		glGetProgramLocalParameterfvARB = (PFNGLGETPROGRAMLOCALPARAMETERFVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetProgramLocalParameterfvARB");
-		glGetProgramivARB = (PFNGLGETPROGRAMIVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetProgramivARB");
-		glGetProgramStringARB = (PFNGLGETPROGRAMSTRINGARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetProgramStringARB");
-		glGetVertexAttribdvARB = (PFNGLGETVERTEXATTRIBDVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetVertexAttribdvARB");
-		glGetVertexAttribfvARB = (PFNGLGETVERTEXATTRIBFVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetVertexAttribfvARB");
-		glGetVertexAttribivARB = (PFNGLGETVERTEXATTRIBIVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetVertexAttribivARB");
-		glGetVertexAttribPointervARB = (PFNGLGETVERTEXATTRIBPOINTERVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glgetVertexAttribPointervARB");
-		glIsProgramARB = (PFNGLISPROGRAMARBPROC) GLH_EXT_GET_PROC_ADDRESS("glIsProgramARB");
-	}
-	LL_DEBUGS("RenderInit") << "GL Probe: Got symbols" << LL_ENDL;
+
+    // Assume shader capabilities
+    glDeleteObjectARB         = (PFNGLDELETEOBJECTARBPROC) GLH_EXT_GET_PROC_ADDRESS("glDeleteObjectARB");
+    glGetHandleARB            = (PFNGLGETHANDLEARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetHandleARB");
+    glDetachObjectARB         = (PFNGLDETACHOBJECTARBPROC) GLH_EXT_GET_PROC_ADDRESS("glDetachObjectARB");
+    glCreateShaderObjectARB   = (PFNGLCREATESHADEROBJECTARBPROC) GLH_EXT_GET_PROC_ADDRESS("glCreateShaderObjectARB");
+    glShaderSourceARB         = (PFNGLSHADERSOURCEARBPROC) GLH_EXT_GET_PROC_ADDRESS("glShaderSourceARB");
+    glCompileShaderARB        = (PFNGLCOMPILESHADERARBPROC) GLH_EXT_GET_PROC_ADDRESS("glCompileShaderARB");
+    glCreateProgramObjectARB  = (PFNGLCREATEPROGRAMOBJECTARBPROC) GLH_EXT_GET_PROC_ADDRESS("glCreateProgramObjectARB");
+    glAttachObjectARB         = (PFNGLATTACHOBJECTARBPROC) GLH_EXT_GET_PROC_ADDRESS("glAttachObjectARB");
+    glLinkProgramARB          = (PFNGLLINKPROGRAMARBPROC) GLH_EXT_GET_PROC_ADDRESS("glLinkProgramARB");
+    glUseProgramObjectARB     = (PFNGLUSEPROGRAMOBJECTARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUseProgramObjectARB");
+    glValidateProgramARB      = (PFNGLVALIDATEPROGRAMARBPROC) GLH_EXT_GET_PROC_ADDRESS("glValidateProgramARB");
+    glUniform1fARB            = (PFNGLUNIFORM1FARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform1fARB");
+    glUniform2fARB            = (PFNGLUNIFORM2FARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform2fARB");
+    glUniform3fARB            = (PFNGLUNIFORM3FARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform3fARB");
+    glUniform4fARB            = (PFNGLUNIFORM4FARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform4fARB");
+    glUniform1iARB            = (PFNGLUNIFORM1IARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform1iARB");
+    glUniform2iARB            = (PFNGLUNIFORM2IARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform2iARB");
+    glUniform3iARB            = (PFNGLUNIFORM3IARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform3iARB");
+    glUniform4iARB            = (PFNGLUNIFORM4IARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform4iARB");
+    glUniform1fvARB           = (PFNGLUNIFORM1FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform1fvARB");
+    glUniform2fvARB           = (PFNGLUNIFORM2FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform2fvARB");
+    glUniform3fvARB           = (PFNGLUNIFORM3FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform3fvARB");
+    glUniform4fvARB           = (PFNGLUNIFORM4FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform4fvARB");
+    glUniform1ivARB           = (PFNGLUNIFORM1IVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform1ivARB");
+    glUniform2ivARB           = (PFNGLUNIFORM2IVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform2ivARB");
+    glUniform3ivARB           = (PFNGLUNIFORM3IVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform3ivARB");
+    glUniform4ivARB           = (PFNGLUNIFORM4IVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniform4ivARB");
+    glUniformMatrix2fvARB     = (PFNGLUNIFORMMATRIX2FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniformMatrix2fvARB");
+    glUniformMatrix3fvARB     = (PFNGLUNIFORMMATRIX3FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniformMatrix3fvARB");
+    glUniformMatrix3x4fv      = (PFNGLUNIFORMMATRIX3X4FVPROC) GLH_EXT_GET_PROC_ADDRESS("glUniformMatrix3x4fv");
+    glUniformMatrix4fvARB     = (PFNGLUNIFORMMATRIX4FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glUniformMatrix4fvARB");
+    glGetObjectParameterfvARB = (PFNGLGETOBJECTPARAMETERFVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetObjectParameterfvARB");
+    glGetObjectParameterivARB = (PFNGLGETOBJECTPARAMETERIVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetObjectParameterivARB");
+    glGetInfoLogARB           = (PFNGLGETINFOLOGARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetInfoLogARB");
+    glGetAttachedObjectsARB   = (PFNGLGETATTACHEDOBJECTSARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetAttachedObjectsARB");
+    glGetUniformLocationARB   = (PFNGLGETUNIFORMLOCATIONARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetUniformLocationARB");
+    glGetActiveUniformARB     = (PFNGLGETACTIVEUNIFORMARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetActiveUniformARB");
+    glGetUniformfvARB         = (PFNGLGETUNIFORMFVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetUniformfvARB");
+    glGetUniformivARB         = (PFNGLGETUNIFORMIVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetUniformivARB");
+    glGetShaderSourceARB      = (PFNGLGETSHADERSOURCEARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetShaderSourceARB");
+
+    LL_INFOS() << "initExtensions() VertexShader-related procs..." << LL_ENDL;
+
+    // nSight doesn't support use of ARB funcs that have been normalized in the API
+    if (!LLRender::sNsightDebugSupport)
+    {
+        glGetAttribLocationARB  = (PFNGLGETATTRIBLOCATIONARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetAttribLocationARB");
+        glBindAttribLocationARB = (PFNGLBINDATTRIBLOCATIONARBPROC) GLH_EXT_GET_PROC_ADDRESS("glBindAttribLocationARB");
+    }
+    else
+    {
+        glGetAttribLocationARB  = (PFNGLGETATTRIBLOCATIONARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetAttribLocation");
+        glBindAttribLocationARB = (PFNGLBINDATTRIBLOCATIONARBPROC) GLH_EXT_GET_PROC_ADDRESS("glBindAttribLocation");
+    }
+
+    glGetActiveAttribARB            = (PFNGLGETACTIVEATTRIBARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetActiveAttribARB");
+    glVertexAttrib1dARB             = (PFNGLVERTEXATTRIB1DARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib1dARB");
+    glVertexAttrib1dvARB            = (PFNGLVERTEXATTRIB1DVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib1dvARB");
+    glVertexAttrib1fARB             = (PFNGLVERTEXATTRIB1FARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib1fARB");
+    glVertexAttrib1fvARB            = (PFNGLVERTEXATTRIB1FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib1fvARB");
+    glVertexAttrib1sARB             = (PFNGLVERTEXATTRIB1SARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib1sARB");
+    glVertexAttrib1svARB            = (PFNGLVERTEXATTRIB1SVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib1svARB");
+    glVertexAttrib2dARB             = (PFNGLVERTEXATTRIB2DARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib2dARB");
+    glVertexAttrib2dvARB            = (PFNGLVERTEXATTRIB2DVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib2dvARB");
+    glVertexAttrib2fARB             = (PFNGLVERTEXATTRIB2FARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib2fARB");
+    glVertexAttrib2fvARB            = (PFNGLVERTEXATTRIB2FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib2fvARB");
+    glVertexAttrib2sARB             = (PFNGLVERTEXATTRIB2SARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib2sARB");
+    glVertexAttrib2svARB            = (PFNGLVERTEXATTRIB2SVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib2svARB");
+    glVertexAttrib3dARB             = (PFNGLVERTEXATTRIB3DARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib3dARB");
+    glVertexAttrib3dvARB            = (PFNGLVERTEXATTRIB3DVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib3dvARB");
+    glVertexAttrib3fARB             = (PFNGLVERTEXATTRIB3FARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib3fARB");
+    glVertexAttrib3fvARB            = (PFNGLVERTEXATTRIB3FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib3fvARB");
+    glVertexAttrib3sARB             = (PFNGLVERTEXATTRIB3SARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib3sARB");
+    glVertexAttrib3svARB            = (PFNGLVERTEXATTRIB3SVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib3svARB");
+    glVertexAttrib4nbvARB           = (PFNGLVERTEXATTRIB4NBVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4nbvARB");
+    glVertexAttrib4nivARB           = (PFNGLVERTEXATTRIB4NIVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4nivARB");
+    glVertexAttrib4nsvARB           = (PFNGLVERTEXATTRIB4NSVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4nsvARB");
+    glVertexAttrib4nubARB           = (PFNGLVERTEXATTRIB4NUBARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4nubARB");
+    glVertexAttrib4nubvARB          = (PFNGLVERTEXATTRIB4NUBVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4nubvARB");
+    glVertexAttrib4nuivARB          = (PFNGLVERTEXATTRIB4NUIVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4nuivARB");
+    glVertexAttrib4nusvARB          = (PFNGLVERTEXATTRIB4NUSVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4nusvARB");
+    glVertexAttrib4bvARB            = (PFNGLVERTEXATTRIB4BVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4bvARB");
+    glVertexAttrib4dARB             = (PFNGLVERTEXATTRIB4DARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4dARB");
+    glVertexAttrib4dvARB            = (PFNGLVERTEXATTRIB4DVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4dvARB");
+    glVertexAttrib4fARB             = (PFNGLVERTEXATTRIB4FARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4fARB");
+    glVertexAttrib4fvARB            = (PFNGLVERTEXATTRIB4FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4fvARB");
+    glVertexAttrib4ivARB            = (PFNGLVERTEXATTRIB4IVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4ivARB");
+    glVertexAttrib4sARB             = (PFNGLVERTEXATTRIB4SARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4sARB");
+    glVertexAttrib4svARB            = (PFNGLVERTEXATTRIB4SVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4svARB");
+    glVertexAttrib4ubvARB           = (PFNGLVERTEXATTRIB4UBVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4ubvARB");
+    glVertexAttrib4uivARB           = (PFNGLVERTEXATTRIB4UIVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4uivARB");
+    glVertexAttrib4usvARB           = (PFNGLVERTEXATTRIB4USVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttrib4usvARB");
+    glVertexAttribPointerARB        = (PFNGLVERTEXATTRIBPOINTERARBPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttribPointerARB");
+    glVertexAttribIPointer          = (PFNGLVERTEXATTRIBIPOINTERPROC) GLH_EXT_GET_PROC_ADDRESS("glVertexAttribIPointer");
+    glEnableVertexAttribArrayARB    = (PFNGLENABLEVERTEXATTRIBARRAYARBPROC) GLH_EXT_GET_PROC_ADDRESS("glEnableVertexAttribArrayARB");
+    glDisableVertexAttribArrayARB   = (PFNGLDISABLEVERTEXATTRIBARRAYARBPROC) GLH_EXT_GET_PROC_ADDRESS("glDisableVertexAttribArrayARB");
+    glProgramStringARB              = (PFNGLPROGRAMSTRINGARBPROC) GLH_EXT_GET_PROC_ADDRESS("glProgramStringARB");
+    glBindProgramARB                = (PFNGLBINDPROGRAMARBPROC) GLH_EXT_GET_PROC_ADDRESS("glBindProgramARB");
+    glDeleteProgramsARB             = (PFNGLDELETEPROGRAMSARBPROC) GLH_EXT_GET_PROC_ADDRESS("glDeleteProgramsARB");
+    glGenProgramsARB                = (PFNGLGENPROGRAMSARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGenProgramsARB");
+    glProgramEnvParameter4dARB      = (PFNGLPROGRAMENVPARAMETER4DARBPROC) GLH_EXT_GET_PROC_ADDRESS("glProgramEnvParameter4dARB");
+    glProgramEnvParameter4dvARB     = (PFNGLPROGRAMENVPARAMETER4DVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glProgramEnvParameter4dvARB");
+    glProgramEnvParameter4fARB      = (PFNGLPROGRAMENVPARAMETER4FARBPROC) GLH_EXT_GET_PROC_ADDRESS("glProgramEnvParameter4fARB");
+    glProgramEnvParameter4fvARB     = (PFNGLPROGRAMENVPARAMETER4FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glProgramEnvParameter4fvARB");
+    glProgramLocalParameter4dARB    = (PFNGLPROGRAMLOCALPARAMETER4DARBPROC) GLH_EXT_GET_PROC_ADDRESS("glProgramLocalParameter4dARB");
+    glProgramLocalParameter4dvARB   = (PFNGLPROGRAMLOCALPARAMETER4DVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glProgramLocalParameter4dvARB");
+    glProgramLocalParameter4fARB    = (PFNGLPROGRAMLOCALPARAMETER4FARBPROC) GLH_EXT_GET_PROC_ADDRESS("glProgramLocalParameter4fARB");
+    glProgramLocalParameter4fvARB   = (PFNGLPROGRAMLOCALPARAMETER4FVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glProgramLocalParameter4fvARB");
+    glGetProgramEnvParameterdvARB   = (PFNGLGETPROGRAMENVPARAMETERDVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetProgramEnvParameterdvARB");
+    glGetProgramEnvParameterfvARB   = (PFNGLGETPROGRAMENVPARAMETERFVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetProgramEnvParameterfvARB");
+    glGetProgramLocalParameterdvARB = (PFNGLGETPROGRAMLOCALPARAMETERDVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetProgramLocalParameterdvARB");
+    glGetProgramLocalParameterfvARB = (PFNGLGETPROGRAMLOCALPARAMETERFVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetProgramLocalParameterfvARB");
+    glGetProgramivARB               = (PFNGLGETPROGRAMIVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetProgramivARB");
+    glGetProgramStringARB           = (PFNGLGETPROGRAMSTRINGARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetProgramStringARB");
+    glGetVertexAttribdvARB          = (PFNGLGETVERTEXATTRIBDVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetVertexAttribdvARB");
+    glGetVertexAttribfvARB          = (PFNGLGETVERTEXATTRIBFVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetVertexAttribfvARB");
+    glGetVertexAttribivARB          = (PFNGLGETVERTEXATTRIBIVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glGetVertexAttribivARB");
+    glGetVertexAttribPointervARB    = (PFNGLGETVERTEXATTRIBPOINTERVARBPROC) GLH_EXT_GET_PROC_ADDRESS("glgetVertexAttribPointervARB");
+    glIsProgramARB                  = (PFNGLISPROGRAMARBPROC) GLH_EXT_GET_PROC_ADDRESS("glIsProgramARB");
+
+    LL_DEBUGS("RenderInit") << "GL Probe: Got symbols" << LL_ENDL;
 #endif
 
-	mInited = TRUE;
+    mInited = TRUE;
 }
 
 void rotate_quat(LLQuaternion& rotation)
@@ -2116,7 +2078,7 @@ void LLGLState::checkClientArrays(const std::string& msg, U32 data_mask)
 	glClientActiveTextureARB(GL_TEXTURE0_ARB);
 	gGL.getTexUnit(0)->activate();
 
-	if (gGLManager.mHasVertexShader && LLGLSLShader::sNoFixedFunction)
+	if (LLGLSLShader::sNoFixedFunction)
 	{	//make sure vertex attribs are all disabled
 		GLint count;
 		glGetIntegerv(GL_MAX_VERTEX_ATTRIBS_ARB, &count);
diff --git a/indra/llrender/llgl.h b/indra/llrender/llgl.h
index a07e2d9bb0..a03d5352be 100644
--- a/indra/llrender/llgl.h
+++ b/indra/llrender/llgl.h
@@ -94,9 +94,6 @@ public:
 	BOOL mHasMapBufferRange;
 	BOOL mHasFlushBufferRange;
 	BOOL mHasPBuffer;
-	BOOL mHasShaderObjects;
-	BOOL mHasVertexShader;
-	BOOL mHasFragmentShader;
 	S32  mNumTextureImageUnits;
 	BOOL mHasOcclusionQuery;
 	BOOL mHasTimerQuery;
diff --git a/indra/llrender/llglheaders.h b/indra/llrender/llglheaders.h
index 6bca3623e0..3d93cc0762 100644
--- a/indra/llrender/llglheaders.h
+++ b/indra/llrender/llglheaders.h
@@ -812,4 +812,23 @@ extern void glGetBufferPointervARB (GLenum, GLenum, GLvoid* *);
 #define GL_RENDERBUFFER_FREE_MEMORY_ATI            0x87FD
 #endif
 
+#if defined(TRACY_ENABLE) && LL_PROFILER_ENABLE_TRACY_OPENGL
+    // Tracy uses the following:
+    //    glGenQueries
+    //    glGetQueryiv
+    //    glGetQueryObjectiv
+    #define glGenQueries        glGenQueriesARB
+    #define glGetQueryiv        glGetQueryivARB
+    #define glGetQueryObjectiv  glGetQueryObjectivARB
+    #include <tracy/TracyOpenGL.hpp>
+
+    #define LL_PROFILER_GPU_ZONEC(name,color) TracyGpuZoneC(name,color);
+    #define LL_PROFILER_GPU_COLLECT           TracyGpuCollect
+    #define LL_PROFILER_GPU_CONTEXT           TracyGpuContext
+#else
+    #define LL_PROFILER_GPU_ZONEC(name,color) (void)name;(void)color;
+    #define LL_PROFILER_GPU_COLLECT
+    #define LL_PROFILER_GPU_CONTEXT
+#endif
+
 #endif // LL_LLGLHEADERS_H
diff --git a/indra/llrender/llglslshader.cpp b/indra/llrender/llglslshader.cpp
index 4351f6e2c8..2fb3b8257d 100644
--- a/indra/llrender/llglslshader.cpp
+++ b/indra/llrender/llglslshader.cpp
@@ -384,6 +384,8 @@ BOOL LLGLSLShader::createShader(std::vector<LLStaticHashedString> * attributes,
                                 U32 varying_count,
                                 const char** varyings)
 {
+    LL_PROFILE_ZONE_SCOPED;
+
     unloadInternal();
 
     sInstances.insert(this);
@@ -588,6 +590,8 @@ void LLGLSLShader::attachObjects(GLhandleARB* objects, S32 count)
 
 BOOL LLGLSLShader::mapAttributes(const std::vector<LLStaticHashedString> * attributes)
 {
+    LL_PROFILE_ZONE_SCOPED;
+
     //before linking, make sure reserved attributes always have consistent locations
     for (U32 i = 0; i < LLShaderMgr::instance()->mReservedAttribs.size(); i++)
     {
@@ -649,6 +653,8 @@ BOOL LLGLSLShader::mapAttributes(const std::vector<LLStaticHashedString> * attri
 
 void LLGLSLShader::mapUniform(GLint index, const vector<LLStaticHashedString> * uniforms)
 {
+    LL_PROFILE_ZONE_SCOPED;
+
     if (index == -1)
     {
         return;
@@ -770,6 +776,8 @@ void LLGLSLShader::removePermutation(std::string name)
 
 GLint LLGLSLShader::mapUniformTextureChannel(GLint location, GLenum type)
 {
+    LL_PROFILE_ZONE_SCOPED;
+
     if ((type >= GL_SAMPLER_1D_ARB && type <= GL_SAMPLER_2D_RECT_SHADOW_ARB) ||
         type == GL_SAMPLER_2D_MULTISAMPLE)
     {   //this here is a texture
@@ -782,7 +790,9 @@ GLint LLGLSLShader::mapUniformTextureChannel(GLint location, GLenum type)
 
 BOOL LLGLSLShader::mapUniforms(const vector<LLStaticHashedString> * uniforms)
 {
-	BOOL res = TRUE;
+    LL_PROFILE_ZONE_SCOPED;
+
+    BOOL res = TRUE;
 
 	mTotalUniformSize = 0;
 	mActiveTextureChannels = 0;
@@ -925,6 +935,8 @@ BOOL LLGLSLShader::mapUniforms(const vector<LLStaticHashedString> * uniforms)
 
 BOOL LLGLSLShader::link(BOOL suppress_errors)
 {
+    LL_PROFILE_ZONE_SCOPED;
+
     BOOL success = LLShaderMgr::instance()->linkProgramObject(mProgramObject, suppress_errors);
 
     if (!success && !suppress_errors)
@@ -937,56 +949,52 @@ BOOL LLGLSLShader::link(BOOL suppress_errors)
 
 void LLGLSLShader::bind()
 {
+    LL_PROFILE_ZONE_SCOPED;
+
     gGL.flush();
-    if (gGLManager.mHasShaderObjects)
+
+    if (sCurBoundShader != mProgramObject)  // Don't re-bind current shader
     {
         LLVertexBuffer::unbind();
         glUseProgramObjectARB(mProgramObject);
         sCurBoundShader = mProgramObject;
         sCurBoundShaderPtr = this;
-        if (mUniformsDirty)
-        {
-            LLShaderMgr::instance()->updateShaderUniforms(this);
-            mUniformsDirty = FALSE;
-        }
+    }
+
+    if (mUniformsDirty)
+    {
+        LLShaderMgr::instance()->updateShaderUniforms(this);
+        mUniformsDirty = FALSE;
     }
 }
 
 void LLGLSLShader::unbind()
 {
+    LL_PROFILE_ZONE_SCOPED;
+
     gGL.flush();
-    if (gGLManager.mHasShaderObjects)
-    {
-        stop_glerror();
-        if (gGLManager.mIsNVIDIA)
-        {
-            for (U32 i = 0; i < mAttribute.size(); ++i)
-            {
-                vertexAttrib4f(i, 0,0,0,1);
-                stop_glerror();
-            }
-        }
-        LLVertexBuffer::unbind();
-        glUseProgramObjectARB(0);
-        sCurBoundShader = 0;
-        sCurBoundShaderPtr = NULL;
-        stop_glerror();
-    }
+    stop_glerror();
+    LLVertexBuffer::unbind();
+    glUseProgramObjectARB(0);
+    sCurBoundShader = 0;
+    sCurBoundShaderPtr = NULL;
+    stop_glerror();
 }
 
 void LLGLSLShader::bindNoShader(void)
 {
+    LL_PROFILE_ZONE_SCOPED;
+
     LLVertexBuffer::unbind();
-    if (gGLManager.mHasShaderObjects)
-    {
-        glUseProgramObjectARB(0);
-        sCurBoundShader = 0;
-        sCurBoundShaderPtr = NULL;
-    }
+    glUseProgramObjectARB(0);
+    sCurBoundShader = 0;
+    sCurBoundShaderPtr = NULL;
 }
 
 S32 LLGLSLShader::bindTexture(const std::string &uniform, LLTexture *texture, LLTexUnit::eTextureType mode, LLTexUnit::eTextureColorSpace colorspace)
 {
+    LL_PROFILE_ZONE_SCOPED;
+
     S32 channel = 0;
     channel = getUniformLocation(uniform);
     
@@ -995,6 +1003,8 @@ S32 LLGLSLShader::bindTexture(const std::string &uniform, LLTexture *texture, LL
 
 S32 LLGLSLShader::bindTexture(S32 uniform, LLTexture *texture, LLTexUnit::eTextureType mode, LLTexUnit::eTextureColorSpace colorspace)
 {
+    LL_PROFILE_ZONE_SCOPED;
+
     if (uniform < 0 || uniform >= (S32)mTexture.size())
     {
         LL_SHADER_UNIFORM_ERRS() << "Uniform out of range: " << uniform << LL_ENDL;
@@ -1014,6 +1024,8 @@ S32 LLGLSLShader::bindTexture(S32 uniform, LLTexture *texture, LLTexUnit::eTextu
 
 S32 LLGLSLShader::unbindTexture(const std::string &uniform, LLTexUnit::eTextureType mode)
 {
+    LL_PROFILE_ZONE_SCOPED;
+
     S32 channel = 0;
     channel = getUniformLocation(uniform);
     
@@ -1022,6 +1034,8 @@ S32 LLGLSLShader::unbindTexture(const std::string &uniform, LLTexUnit::eTextureT
 
 S32 LLGLSLShader::unbindTexture(S32 uniform, LLTexUnit::eTextureType mode)
 {
+    LL_PROFILE_ZONE_SCOPED;
+
     if (uniform < 0 || uniform >= (S32)mTexture.size())
     {
         LL_SHADER_UNIFORM_ERRS() << "Uniform out of range: " << uniform << LL_ENDL;
@@ -1040,6 +1054,8 @@ S32 LLGLSLShader::unbindTexture(S32 uniform, LLTexUnit::eTextureType mode)
 
 S32 LLGLSLShader::enableTexture(S32 uniform, LLTexUnit::eTextureType mode, LLTexUnit::eTextureColorSpace space)
 {
+    LL_PROFILE_ZONE_SCOPED;
+
     if (uniform < 0 || uniform >= (S32)mTexture.size())
     {
         LL_SHADER_UNIFORM_ERRS() << "Uniform out of range: " << uniform << LL_ENDL;
@@ -1057,6 +1073,8 @@ S32 LLGLSLShader::enableTexture(S32 uniform, LLTexUnit::eTextureType mode, LLTex
 
 S32 LLGLSLShader::disableTexture(S32 uniform, LLTexUnit::eTextureType mode, LLTexUnit::eTextureColorSpace space)
 {
+    LL_PROFILE_ZONE_SCOPED;
+
     if (uniform < 0 || uniform >= (S32)mTexture.size())
     {
         LL_SHADER_UNIFORM_ERRS() << "Uniform out of range: " << uniform << LL_ENDL;
@@ -1346,6 +1364,8 @@ void LLGLSLShader::uniformMatrix3fv(U32 index, U32 count, GLboolean transpose, c
 
 void LLGLSLShader::uniformMatrix3x4fv(U32 index, U32 count, GLboolean transpose, const GLfloat *v)
 {
+    LL_PROFILE_ZONE_SCOPED;
+
 	if (mProgramObject)
 	{	
 		if (mUniform.size() <= index)
@@ -1380,6 +1400,8 @@ void LLGLSLShader::uniformMatrix4fv(U32 index, U32 count, GLboolean transpose, c
 
 GLint LLGLSLShader::getUniformLocation(const LLStaticHashedString& uniform)
 {
+    LL_PROFILE_ZONE_SCOPED;
+
     GLint ret = -1;
     if (mProgramObject)
     {
@@ -1404,6 +1426,8 @@ GLint LLGLSLShader::getUniformLocation(const LLStaticHashedString& uniform)
 
 GLint LLGLSLShader::getUniformLocation(U32 index)
 {
+    LL_PROFILE_ZONE_SCOPED;
+
     GLint ret = -1;
     if (mProgramObject)
     {
@@ -1416,6 +1440,8 @@ GLint LLGLSLShader::getUniformLocation(U32 index)
 
 GLint LLGLSLShader::getAttribLocation(U32 attrib)
 {
+    LL_PROFILE_ZONE_SCOPED;
+
     if (attrib < mAttribute.size())
     {
         return mAttribute[attrib];
diff --git a/indra/llrender/llimagegl.cpp b/indra/llrender/llimagegl.cpp
index 0151d20128..276fa55e15 100644
--- a/indra/llrender/llimagegl.cpp
+++ b/indra/llrender/llimagegl.cpp
@@ -1304,7 +1304,10 @@ void LLImageGL::setManualImage(U32 target, S32 miplevel, S32 intformat, S32 widt
 	}
 
 	stop_glerror();
-	glTexImage2D(target, miplevel, intformat, width, height, 0, pixformat, pixtype, use_scratch ? scratch : pixels);
+	{
+		LL_PROFILE_ZONE_NAMED("glTexImage2D");
+		glTexImage2D(target, miplevel, intformat, width, height, 0, pixformat, pixtype, use_scratch ? scratch : pixels);
+	}
 	stop_glerror();
 
 	if (use_scratch)
diff --git a/indra/llrender/llrender.cpp b/indra/llrender/llrender.cpp
index 03b6aac20c..43b4441ea8 100644
--- a/indra/llrender/llrender.cpp
+++ b/indra/llrender/llrender.cpp
@@ -1848,6 +1848,7 @@ LLLightState* LLRender::getLight(U32 index)
 
 void LLRender::setAmbientLightColor(const LLColor4& color)
 {
+	LL_PROFILE_ZONE_SCOPED
 	if (color != mAmbientLightColor)
 	{
 		++mLightHash;
diff --git a/indra/llrender/llrendertarget.cpp b/indra/llrender/llrendertarget.cpp
index e3c0255290..401085a00b 100644
--- a/indra/llrender/llrendertarget.cpp
+++ b/indra/llrender/llrendertarget.cpp
@@ -437,11 +437,13 @@ void LLRenderTarget::bindTarget()
 									GL_COLOR_ATTACHMENT1,
 									GL_COLOR_ATTACHMENT2,
 									GL_COLOR_ATTACHMENT3};
+			LL_PROFILER_GPU_ZONEC( "gl.DrawBuffersARB", 0x4000FF )
 			glDrawBuffersARB(mTex.size(), drawbuffers);
 		}
 			
 		if (mTex.empty())
 		{ //no color buffer to draw to
+			LL_PROFILER_GPU_ZONEC( "gl.DrawBuffer", 0x0000FF )
 			glDrawBuffer(GL_NONE);
 			glReadBuffer(GL_NONE);
 		}
diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp
index 7d2b09ca4a..0449ac392c 100644
--- a/indra/llrender/llvertexbuffer.cpp
+++ b/indra/llrender/llvertexbuffer.cpp
@@ -91,6 +91,8 @@ LLVBOPool LLVertexBuffer::sDynamicIBOPool(GL_DYNAMIC_DRAW_ARB, GL_ELEMENT_ARRAY_
 
 U32 LLVBOPool::sBytesPooled = 0;
 U32 LLVBOPool::sIndexBytesPooled = 0;
+U32 LLVBOPool::sNameIdx = 0;
+U32 LLVBOPool::sNamePool[1024];
 
 std::list<U32> LLVertexBuffer::sAvailableVAOName;
 U32 LLVertexBuffer::sCurVAOName = 1;
@@ -121,15 +123,20 @@ bool LLVertexBuffer::sPreferStreamDraw = false;
 
 U32 LLVBOPool::genBuffer()
 {
-	U32 ret = 0;
+	LL_PROFILE_ZONE_SCOPED
 
-	glGenBuffersARB(1, &ret);
-	
-	return ret;
+	if (sNameIdx == 0)
+	{
+		glGenBuffersARB(1024, sNamePool);
+		sNameIdx = 1024;
+	}
+
+	return sNamePool[--sNameIdx];
 }
 
 void LLVBOPool::deleteBuffer(U32 name)
 {
+	LL_PROFILE_ZONE_SCOPED
 	if (gGLManager.mInited)
 	{
 		LLVertexBuffer::unbind();
@@ -152,6 +159,7 @@ LLVBOPool::LLVBOPool(U32 vboUsage, U32 vboType)
 
 volatile U8* LLVBOPool::allocate(U32& name, U32 size, bool for_seed)
 {
+	LL_PROFILE_ZONE_SCOPED
 	llassert(vbo_block_size(size) == size);
 	
 	volatile U8* ret = NULL;
@@ -267,10 +275,12 @@ void LLVBOPool::release(U32 name, volatile U8* buffer, U32 size)
 
 void LLVBOPool::seedPool()
 {
+	LL_PROFILE_ZONE_SCOPED
 	U32 dummy_name = 0;
 
 	if (mFreeList.size() < LL_VBO_POOL_SEED_COUNT)
 	{
+		LL_PROFILE_ZONE_NAMED("VBOPool Resize");
 		mFreeList.resize(LL_VBO_POOL_SEED_COUNT);
 	}
 
@@ -411,6 +421,7 @@ void LLVertexBuffer::releaseVAOName(U32 name)
 //static
 void LLVertexBuffer::seedPools()
 {
+	LL_PROFILE_ZONE_SCOPED
 	sStreamVBOPool.seedPool();
 	sDynamicVBOPool.seedPool();
 	sDynamicCopyVBOPool.seedPool();
@@ -615,6 +626,7 @@ void LLVertexBuffer::drawArrays(U32 mode, const std::vector<LLVector3>& pos, con
 		glNormalPointer(GL_FLOAT, 0, norm[0].mV);
 	}
 	LLGLSLShader::startProfile();
+	LL_PROFILER_GPU_ZONEC( "gl.DrawArrays", 0xFF0000 )
 	glDrawArrays(sGLMode[mode], 0, count);
 	LLGLSLShader::stopProfile(count, mode);
 }
@@ -654,6 +666,7 @@ void LLVertexBuffer::drawElements(U32 mode, const LLVector4a* pos, const LLVecto
 	}
 
 	LLGLSLShader::startProfile();
+    LL_PROFILER_GPU_ZONEC( "gl.DrawElements", 0x80FF80 )
 	glDrawElements(sGLMode[mode], num_indices, GL_UNSIGNED_SHORT, indicesp);
 	LLGLSLShader::stopProfile(num_indices, mode);
 }
@@ -763,6 +776,7 @@ void LLVertexBuffer::drawRange(U32 mode, U32 start, U32 end, U32 count, U32 indi
 
 	stop_glerror();
 	LLGLSLShader::startProfile();
+    LL_PROFILER_GPU_ZONEC( "gl.DrawRangeElements", 0xFFFF00 )
 	glDrawRangeElements(sGLMode[mode], start, end, count, GL_UNSIGNED_SHORT, 
 		idx);
 	LLGLSLShader::stopProfile(count, mode);
@@ -814,6 +828,7 @@ void LLVertexBuffer::draw(U32 mode, U32 count, U32 indices_offset) const
 
 	stop_glerror();
 	LLGLSLShader::startProfile();
+    LL_PROFILER_GPU_ZONEC( "gl.DrawElements", 0xA0FFA0 )
 	glDrawElements(sGLMode[mode], count, GL_UNSIGNED_SHORT,
 		((U16*) getIndicesPointer()) + indices_offset);
 	LLGLSLShader::stopProfile(count, mode);
@@ -861,6 +876,7 @@ void LLVertexBuffer::drawArrays(U32 mode, U32 first, U32 count) const
 		stop_glerror();
 		LLGLSLShader::startProfile();
 		stop_glerror();
+        LL_PROFILER_GPU_ZONEC( "gl.DrawArrays", 0xFF4040 )
 		glDrawArrays(sGLMode[mode], first, count);
 		stop_glerror();
 		LLGLSLShader::stopProfile(count, mode);
diff --git a/indra/llrender/llvertexbuffer.h b/indra/llrender/llvertexbuffer.h
index dbe1a3687f..1d60970df4 100644
--- a/indra/llrender/llvertexbuffer.h
+++ b/indra/llrender/llvertexbuffer.h
@@ -89,6 +89,9 @@ public:
 	std::vector<record_list_t> mFreeList;
 	std::vector<U32> mMissCount;
 
+	//used to avoid calling glGenBuffers for every VBO creation
+	static U32 sNamePool[1024];
+	static U32 sNameIdx;
 };
 
 
@@ -127,7 +130,7 @@ public:
 	static LLVBOPool sDynamicCopyVBOPool;
 	static LLVBOPool sStreamIBOPool;
 	static LLVBOPool sDynamicIBOPool;
-	
+
 	static std::list<U32> sAvailableVAOName;
 	static U32 sCurVAOName;
 
diff --git a/indra/llui/llnotifications.cpp b/indra/llui/llnotifications.cpp
index b791a19c2b..88eda1c172 100644
--- a/indra/llui/llnotifications.cpp
+++ b/indra/llui/llnotifications.cpp
@@ -1387,7 +1387,7 @@ bool LLNotifications::failedUniquenessTest(const LLSD& payload)
 
 LLNotificationChannelPtr LLNotifications::getChannel(const std::string& channelName)
 {
-	return LLNotificationChannelPtr(LLNotificationChannel::getInstance(channelName));
+	return LLNotificationChannelPtr(LLNotificationChannel::getInstance(channelName).get());
 }
 
 
diff --git a/indra/llui/llstatbar.cpp b/indra/llui/llstatbar.cpp
index 6c8e63442b..8adcd664df 100644
--- a/indra/llui/llstatbar.cpp
+++ b/indra/llui/llstatbar.cpp
@@ -554,29 +554,25 @@ void LLStatBar::draw()
 void LLStatBar::setStat(const std::string& stat_name)
 {
 	using namespace LLTrace;
-	const StatType<CountAccumulator>*	count_stat;
-	const StatType<EventAccumulator>*	event_stat;
-	const StatType<SampleAccumulator>*	sample_stat;
-	const StatType<MemAccumulator>*		mem_stat;
 
-	if ((count_stat = StatType<CountAccumulator>::getInstance(stat_name)))
+	if (auto count_stat = StatType<CountAccumulator>::getInstance(stat_name))
 	{
-		mStat.countStatp = count_stat;
+		mStat.countStatp = count_stat.get();
 		mStatType = STAT_COUNT;
 	}
-	else if ((event_stat = StatType<EventAccumulator>::getInstance(stat_name)))
+	else if (auto event_stat = StatType<EventAccumulator>::getInstance(stat_name))
 	{
-		mStat.eventStatp = event_stat;
+		mStat.eventStatp = event_stat.get();
 		mStatType = STAT_EVENT;
 	}
-	else if ((sample_stat = StatType<SampleAccumulator>::getInstance(stat_name)))
+	else if (auto sample_stat = StatType<SampleAccumulator>::getInstance(stat_name))
 	{
-		mStat.sampleStatp = sample_stat;
+		mStat.sampleStatp = sample_stat.get();
 		mStatType = STAT_SAMPLE;
 	}
-	else if ((mem_stat = StatType<MemAccumulator>::getInstance(stat_name)))
+	else if (auto mem_stat = StatType<MemAccumulator>::getInstance(stat_name))
 	{
-		mStat.memStatp = mem_stat;
+		mStat.memStatp = mem_stat.get();
 		mStatType = STAT_MEM;
 	}
 }
diff --git a/indra/llwindow/llwindowwin32.cpp b/indra/llwindow/llwindowwin32.cpp
index b2b123f0da..4ce7c30bef 100644
--- a/indra/llwindow/llwindowwin32.cpp
+++ b/indra/llwindow/llwindowwin32.cpp
@@ -28,6 +28,8 @@
 
 #if LL_WINDOWS && !LL_MESA_HEADLESS
 
+#define LL_WINDOW_SINGLE_THREADED 0
+
 #include "llwindowwin32.h"
 
 // LLWindow library includes
@@ -45,6 +47,7 @@
 #include "lldir.h"
 #include "llsdutil.h"
 #include "llglslshader.h"
+#include "llthreadsafequeue.h"
 
 // System includes
 #include <commdlg.h>
@@ -79,6 +82,18 @@ const F32	ICON_FLASH_TIME = 0.5f;
 
 extern BOOL gDebugWindowProc;
 
+static std::thread::id sWindowThreadId;
+static std::thread::id sMainThreadId;
+
+#if 1 || LL_WINDOW_SINGLE_THREADED
+#define ASSERT_MAIN_THREAD()
+#define ASSERT_WINDOW_THREAD()
+#else
+#define ASSERT_MAIN_THREAD() llassert(LLThread::currentID() == sMainThreadId)
+#define ASSERT_WINDOW_THREAD() llassert(LLThread::currentID() == sWindowThreadId)
+#endif
+
+
 LPWSTR gIconResource = IDI_APPLICATION;
 LPDIRECTINPUT8 gDirectInput8;
 
@@ -294,7 +309,7 @@ LLWinImm::LLWinImm() : mHImmDll(NULL)
 
 
 // static 
-BOOL	LLWinImm::isIME(HKL hkl)															
+BOOL	LLWinImm::isIME(HKL hkl)
 { 
 	if ( sTheInstance.mImmIsIME )
 		return sTheInstance.mImmIsIME(hkl); 
@@ -326,7 +341,7 @@ BOOL		LLWinImm::getOpenStatus(HIMC himc)
 }
 
 // static 
-BOOL		LLWinImm::setOpenStatus(HIMC himc, BOOL status)									
+BOOL		LLWinImm::setOpenStatus(HIMC himc, BOOL status)
 { 
 	if ( sTheInstance.mImmSetOpenStatus )
 		return sTheInstance.mImmSetOpenStatus(himc, status); 
@@ -454,6 +469,8 @@ private:
 
 static LLMonitorInfo sMonitorInfo;
 
+
+
 LLWindowWin32::LLWindowWin32(LLWindowCallbacks* callbacks,
 							 const std::string& title, const std::string& name, S32 x, S32 y, S32 width,
 							 S32 height, U32 flags, 
@@ -463,7 +480,11 @@ LLWindowWin32::LLWindowWin32(LLWindowCallbacks* callbacks,
 							 U32 fsaa_samples)
 	: LLWindow(callbacks, fullscreen, flags)
 {
-	
+    sMainThreadId = LLThread::currentID();
+    mWindowThread = new LLWindowWin32Thread(this);
+#if !LL_WINDOW_SINGLE_THREADED
+    mWindowThread->start();
+#endif
 	//MAINT-516 -- force a load of opengl32.dll just in case windows went sideways 
 	LoadLibrary(L"opengl32.dll");
 
@@ -784,7 +805,7 @@ LLWindowWin32::LLWindowWin32(LLWindowCallbacks* callbacks,
 	LLCoordScreen windowPos(x,y);
 	LLCoordScreen windowSize(window_rect.right - window_rect.left,
 							 window_rect.bottom - window_rect.top);
-	if (!switchContext(mFullscreen, windowSize, TRUE, &windowPos))
+	if (!switchContext(mFullscreen, windowSize, disable_vsync, &windowPos))
 	{
 		return;
 	}
@@ -811,6 +832,8 @@ LLWindowWin32::~LLWindowWin32()
 
 	delete [] mWindowClassName;
 	mWindowClassName = NULL;
+    
+    delete mWindowThread;
 }
 
 void LLWindowWin32::show()
@@ -930,26 +953,35 @@ void LLWindowWin32::close()
 
 	LL_DEBUGS("Window") << "Destroying Window" << LL_ENDL;
 
-    if (IsWindow(mWindowHandle))
-    {
-        // Make sure we don't leave a blank toolbar button.
-        ShowWindow(mWindowHandle, SW_HIDE);
-
-        // This causes WM_DESTROY to be sent *immediately*
-        if (!destroy_window_handler(mWindowHandle))
+    mWindowThread->post([=]()
         {
-            OSMessageBox(mCallbacks->translateString("MBDestroyWinFailed"),
-                mCallbacks->translateString("MBShutdownErr"),
-                OSMB_OK);
-        }
-    }
-    else
+            if (IsWindow(mWindowHandle))
+            {
+                // Make sure we don't leave a blank toolbar button.
+                ShowWindow(mWindowHandle, SW_HIDE);
+
+                // This causes WM_DESTROY to be sent *immediately*
+                if (!destroy_window_handler(mWindowHandle))
+                {
+                    OSMessageBox(mCallbacks->translateString("MBDestroyWinFailed"),
+                        mCallbacks->translateString("MBShutdownErr"),
+                        OSMB_OK);
+                }
+            }
+            else
+            {
+                // Something killed the window while we were busy destroying gl or handle somehow got broken
+                LL_WARNS("Window") << "Failed to destroy Window, invalid handle!" << LL_ENDL;
+            }
+            mWindowHandle = NULL;
+
+            mWindowThread->mFinished = true;
+        });
+
+    while (!mWindowThread->isStopped())
     {
-        // Something killed the window while we were busy destroying gl or handle somehow got broken
-        LL_WARNS("Window") << "Failed to destroy Window, invalid handle!" << LL_ENDL;
+        std::this_thread::sleep_for(std::chrono::milliseconds(1));
     }
-
-	mWindowHandle = NULL;
 }
 
 BOOL LLWindowWin32::isValid()
@@ -1090,171 +1122,203 @@ BOOL LLWindowWin32::setSizeImpl(const LLCoordWindow size)
 }
 
 // changing fullscreen resolution
-BOOL LLWindowWin32::switchContext(BOOL fullscreen, const LLCoordScreen &size, BOOL disable_vsync, const LLCoordScreen * const posp)
+BOOL LLWindowWin32::switchContext(BOOL fullscreen, const LLCoordScreen& size, BOOL disable_vsync, const LLCoordScreen* const posp)
 {
-	GLuint	pixel_format;
-	DEVMODE dev_mode;
-	::ZeroMemory(&dev_mode, sizeof(DEVMODE));
-	dev_mode.dmSize = sizeof(DEVMODE);
-	DWORD	current_refresh;
-	DWORD	dw_ex_style;
-	DWORD	dw_style;
-	RECT	window_rect = {0, 0, 0, 0};
-	S32 width = size.mX;
-	S32 height = size.mY;
-	BOOL auto_show = FALSE;
-
-	if (mhRC)	
-	{
-		auto_show = TRUE;
-		resetDisplayResolution();
-	}
-
-	if (EnumDisplaySettings(NULL, ENUM_CURRENT_SETTINGS, &dev_mode))
-	{
-		current_refresh = dev_mode.dmDisplayFrequency;
-	}
-	else
-	{
-		current_refresh = 60;
-	}
-
-	gGLManager.shutdownGL();
-	//destroy gl context
-	if (mhRC)
-	{
-		if (!wglMakeCurrent(NULL, NULL))
-		{
-			LL_WARNS("Window") << "Release of DC and RC failed" << LL_ENDL;
-		}
+    //called from main thread
+    GLuint	pixel_format;
+    DEVMODE dev_mode;
+    ::ZeroMemory(&dev_mode, sizeof(DEVMODE));
+    dev_mode.dmSize = sizeof(DEVMODE);
+    DWORD	current_refresh;
+    DWORD	dw_ex_style;
+    DWORD	dw_style;
+    RECT	window_rect = { 0, 0, 0, 0 };
+    S32 width = size.mX;
+    S32 height = size.mY;
+    BOOL auto_show = FALSE;
+
+    if (mhRC)
+    {
+        auto_show = TRUE;
+        resetDisplayResolution();
+    }
 
-		if (!wglDeleteContext(mhRC))
-		{
-			LL_WARNS("Window") << "Release of rendering context failed" << LL_ENDL;
-		}
+    if (EnumDisplaySettings(NULL, ENUM_CURRENT_SETTINGS, &dev_mode))
+    {
+        current_refresh = dev_mode.dmDisplayFrequency;
+    }
+    else
+    {
+        current_refresh = 60;
+    }
 
-		mhRC = NULL;
-	}
+    gGLManager.shutdownGL();
+    //destroy gl context
+    if (mhRC)
+    {
+        if (!wglMakeCurrent(NULL, NULL))
+        {
+            LL_WARNS("Window") << "Release of DC and RC failed" << LL_ENDL;
+        }
 
-	if (fullscreen)
-	{
-		mFullscreen = TRUE;
-		BOOL success = FALSE;
-		DWORD closest_refresh = 0;
+        if (!wglDeleteContext(mhRC))
+        {
+            LL_WARNS("Window") << "Release of rendering context failed" << LL_ENDL;
+        }
 
-		for (S32 mode_num = 0;; mode_num++)
-		{
-			if (!EnumDisplaySettings(NULL, mode_num, &dev_mode))
-			{
-				break;
-			}
+        mhRC = NULL;
+    }
 
-			if (dev_mode.dmPelsWidth == width &&
-				dev_mode.dmPelsHeight == height &&
-				dev_mode.dmBitsPerPel == BITS_PER_PIXEL)
-			{
-				success = TRUE;
-				if ((dev_mode.dmDisplayFrequency - current_refresh)
-					< (closest_refresh - current_refresh))
-				{
-					closest_refresh = dev_mode.dmDisplayFrequency;
-				}
-			}
-		}
+    if (fullscreen)
+    {
+        mFullscreen = TRUE;
+        BOOL success = FALSE;
+        DWORD closest_refresh = 0;
 
-		if (closest_refresh == 0)
-		{
-			LL_WARNS("Window") << "Couldn't find display mode " << width << " by " << height << " at " << BITS_PER_PIXEL << " bits per pixel" << LL_ENDL;
-			return FALSE;
-		}
+        for (S32 mode_num = 0;; mode_num++)
+        {
+            if (!EnumDisplaySettings(NULL, mode_num, &dev_mode))
+            {
+                break;
+            }
 
-		// If we found a good resolution, use it.
-		if (success)
-		{
-			success = setDisplayResolution(width, height, BITS_PER_PIXEL, closest_refresh);
-		}
+            if (dev_mode.dmPelsWidth == width &&
+                dev_mode.dmPelsHeight == height &&
+                dev_mode.dmBitsPerPel == BITS_PER_PIXEL)
+            {
+                success = TRUE;
+                if ((dev_mode.dmDisplayFrequency - current_refresh)
+                    < (closest_refresh - current_refresh))
+                {
+                    closest_refresh = dev_mode.dmDisplayFrequency;
+                }
+            }
+        }
 
-		// Keep a copy of the actual current device mode in case we minimize 
-		// and change the screen resolution.   JC
-		EnumDisplaySettings(NULL, ENUM_CURRENT_SETTINGS, &dev_mode);
+        if (closest_refresh == 0)
+        {
+            LL_WARNS("Window") << "Couldn't find display mode " << width << " by " << height << " at " << BITS_PER_PIXEL << " bits per pixel" << LL_ENDL;
+            return FALSE;
+        }
 
-		if (success)
-		{
-			mFullscreen = TRUE;
-			mFullscreenWidth   = dev_mode.dmPelsWidth;
-			mFullscreenHeight  = dev_mode.dmPelsHeight;
-			mFullscreenBits    = dev_mode.dmBitsPerPel;
-			mFullscreenRefresh = dev_mode.dmDisplayFrequency;
+        // If we found a good resolution, use it.
+        if (success)
+        {
+            success = setDisplayResolution(width, height, BITS_PER_PIXEL, closest_refresh);
+        }
 
-			LL_INFOS("Window") << "Running at " << dev_mode.dmPelsWidth
-				<< "x"   << dev_mode.dmPelsHeight
-				<< "x"   << dev_mode.dmBitsPerPel
-				<< " @ " << dev_mode.dmDisplayFrequency
-				<< LL_ENDL;
+        // Keep a copy of the actual current device mode in case we minimize 
+        // and change the screen resolution.   JC
+        EnumDisplaySettings(NULL, ENUM_CURRENT_SETTINGS, &dev_mode);
 
-			window_rect.left = (long) 0;
-			window_rect.right = (long) width;			// Windows GDI rects don't include rightmost pixel
-			window_rect.top = (long) 0;
-			window_rect.bottom = (long) height;
-			dw_ex_style = WS_EX_APPWINDOW;
-			dw_style = WS_POPUP;
+        if (success)
+        {
+            mFullscreen = TRUE;
+            mFullscreenWidth = dev_mode.dmPelsWidth;
+            mFullscreenHeight = dev_mode.dmPelsHeight;
+            mFullscreenBits = dev_mode.dmBitsPerPel;
+            mFullscreenRefresh = dev_mode.dmDisplayFrequency;
+
+            LL_INFOS("Window") << "Running at " << dev_mode.dmPelsWidth
+                << "x" << dev_mode.dmPelsHeight
+                << "x" << dev_mode.dmBitsPerPel
+                << " @ " << dev_mode.dmDisplayFrequency
+                << LL_ENDL;
+
+            window_rect.left = (long)0;
+            window_rect.right = (long)width;			// Windows GDI rects don't include rightmost pixel
+            window_rect.top = (long)0;
+            window_rect.bottom = (long)height;
+            dw_ex_style = WS_EX_APPWINDOW;
+            dw_style = WS_POPUP;
+
+            // Move window borders out not to cover window contents.
+            // This converts client rect to window rect, i.e. expands it by the window border size.
+            AdjustWindowRectEx(&window_rect, dw_style, FALSE, dw_ex_style);
+        }
+        // If it failed, we don't want to run fullscreen
+        else
+        {
+            mFullscreen = FALSE;
+            mFullscreenWidth = -1;
+            mFullscreenHeight = -1;
+            mFullscreenBits = -1;
+            mFullscreenRefresh = -1;
 
-			// Move window borders out not to cover window contents.
-			// This converts client rect to window rect, i.e. expands it by the window border size.
-			AdjustWindowRectEx(&window_rect, dw_style, FALSE, dw_ex_style);
-		}
-		// If it failed, we don't want to run fullscreen
-		else
-		{
-			mFullscreen = FALSE;
-			mFullscreenWidth   = -1;
-			mFullscreenHeight  = -1;
-			mFullscreenBits    = -1;
-			mFullscreenRefresh = -1;
+            LL_INFOS("Window") << "Unable to run fullscreen at " << width << "x" << height << LL_ENDL;
+            return FALSE;
+        }
+    }
+    else
+    {
+        mFullscreen = FALSE;
+        window_rect.left = (long)(posp ? posp->mX : 0);
+        window_rect.right = (long)width + window_rect.left;			// Windows GDI rects don't include rightmost pixel
+        window_rect.top = (long)(posp ? posp->mY : 0);
+        window_rect.bottom = (long)height + window_rect.top;
+        // Window with an edge
+        dw_ex_style = WS_EX_APPWINDOW | WS_EX_WINDOWEDGE;
+        dw_style = WS_OVERLAPPEDWINDOW;
+    }
 
-			LL_INFOS("Window") << "Unable to run fullscreen at " << width << "x" << height << LL_ENDL;
-			return FALSE;
-		}
-	}
-	else
-	{
-		mFullscreen = FALSE;
-		window_rect.left = (long) (posp ? posp->mX : 0);
-		window_rect.right = (long) width + window_rect.left;			// Windows GDI rects don't include rightmost pixel
-		window_rect.top = (long) (posp ? posp->mY : 0);
-		window_rect.bottom = (long) height + window_rect.top;
-		// Window with an edge
-		dw_ex_style = WS_EX_APPWINDOW | WS_EX_WINDOWEDGE;
-		dw_style = WS_OVERLAPPEDWINDOW;
-	}
 
+    // don't post quit messages when destroying old windows
+    mPostQuit = FALSE;
 
-	// don't post quit messages when destroying old windows
-	mPostQuit = FALSE;
 
-	// create window
+    // create window
     LL_DEBUGS("Window") << "Creating window with X: " << window_rect.left
         << " Y: " << window_rect.top
         << " Width: " << (window_rect.right - window_rect.left)
         << " Height: " << (window_rect.bottom - window_rect.top)
         << " Fullscreen: " << mFullscreen
         << LL_ENDL;
-    if (mWindowHandle && !destroy_window_handler(mWindowHandle))
+
+    auto oldHandle = mWindowHandle;
+
+    //zero out mWindowHandle and mhDC before destroying window so window thread falls back to peekmessage
+    mWindowHandle = 0;
+    mhDC = 0;
+
+    if (oldHandle && !destroy_window_handler(oldHandle))
     {
         LL_WARNS("Window") << "Failed to properly close window before recreating it!" << LL_ENDL;
-    }	
-	mWindowHandle = CreateWindowEx(dw_ex_style,
-		mWindowClassName,
-		mWindowTitle,
-		WS_CLIPSIBLINGS | WS_CLIPCHILDREN | dw_style,
-		window_rect.left,								// x pos
-		window_rect.top,								// y pos
-		window_rect.right - window_rect.left,			// width
-		window_rect.bottom - window_rect.top,			// height
-		NULL,
-		NULL,
-		mhInstance,
-		NULL);
+    }
+
+    mWindowHandle = NULL;
+    mhDC = 0;
+
+    mWindowThread->post(
+        [this, window_rect, dw_ex_style, dw_style]()
+        {
+            mWindowHandle = CreateWindowEx(dw_ex_style,
+                mWindowClassName,
+                mWindowTitle,
+                WS_CLIPSIBLINGS | WS_CLIPCHILDREN | dw_style,
+                window_rect.left,								// x pos
+                window_rect.top,								// y pos
+                window_rect.right - window_rect.left,			// width
+                window_rect.bottom - window_rect.top,			// height
+                NULL,
+                NULL,
+                mhInstance,
+                NULL);
+
+            if (mWindowHandle)
+            {
+                mhDC = GetDC(mWindowHandle);
+            }
+        }
+    );
+
+    // HACK wait for above handle to become populated
+    // TODO: use a future
+    int count = 1024;
+    while (!mhDC && count > 0)
+    {
+        Sleep(10);
+        --count;
+    }
 
 	if (mWindowHandle)
 	{
@@ -1288,7 +1352,7 @@ BOOL LLWindowWin32::switchContext(BOOL fullscreen, const LLCoordScreen &size, BO
 			0, 0, 0
 	};
 
-	if (!(mhDC = GetDC(mWindowHandle)))
+	if (!mhDC)
 	{
 		close();
 		OSMessageBox(mCallbacks->translateString("MBDevContextErr"),
@@ -1582,25 +1646,48 @@ const	S32   max_format  = (S32)num_formats - 1;
 			mhDC = 0;											// Zero The Device Context
 		}
 
+        auto oldHandle = mWindowHandle;
+        mWindowHandle = NULL;
+        mhDC = 0;
+
         // Destroy The Window
-        if (mWindowHandle && !destroy_window_handler(mWindowHandle))
+        if (oldHandle && !destroy_window_handler(oldHandle))
         {
             LL_WARNS("Window") << "Failed to properly close window!" << LL_ENDL;
         }		
 
-		mWindowHandle = CreateWindowEx(dw_ex_style,
-			mWindowClassName,
-			mWindowTitle,
-			WS_CLIPSIBLINGS | WS_CLIPCHILDREN | dw_style,
-			window_rect.left,								// x pos
-			window_rect.top,								// y pos
-			window_rect.right - window_rect.left,			// width
-			window_rect.bottom - window_rect.top,			// height
-			NULL,
-			NULL,
-			mhInstance,
-			NULL);
+        mWindowThread->post(
+            [this, window_rect, dw_ex_style, dw_style]()
+            {
+                mWindowHandle = CreateWindowEx(dw_ex_style,
+                    mWindowClassName,
+                    mWindowTitle,
+                    WS_CLIPSIBLINGS | WS_CLIPCHILDREN | dw_style,
+                    window_rect.left,								// x pos
+                    window_rect.top,								// y pos
+                    window_rect.right - window_rect.left,			// width
+                    window_rect.bottom - window_rect.top,			// height
+                    NULL,
+                    NULL,
+                    mhInstance,
+                    NULL);
+
+                if (mWindowHandle)
+                {
+                    mhDC = GetDC(mWindowHandle);
+                }
+            }
+        );
 
+        // HACK wait for above handle to become populated
+        // TODO: use a future
+        int count = 1024;
+        while (!mhDC && count > 0)
+        {
+            PostMessage(oldHandle, WM_USER + 8, 0x1717, 0x3b3b);
+            Sleep(10);
+            --count;
+        }
 
 		if (mWindowHandle)
 		{
@@ -1612,7 +1699,7 @@ const	S32   max_format  = (S32)num_formats - 1;
 			LL_WARNS("Window") << "Window recreation failed, code: " << GetLastError() << LL_ENDL;
 		}
 
-		if (!(mhDC = GetDC(mWindowHandle)))
+		if (!mhDC)
 		{
 			close();
 			OSMessageBox(mCallbacks->translateString("MBDevContextErr"), mCallbacks->translateString("MBError"), OSMB_OK);
@@ -1748,6 +1835,8 @@ const	S32   max_format  = (S32)num_formats - 1;
 		return FALSE;
 	}
 
+	LL_PROFILER_GPU_CONTEXT
+
 	if (!gGLManager.initGL())
 	{
 		close();
@@ -1764,6 +1853,7 @@ const	S32   max_format  = (S32)num_formats - 1;
 	else
 	{
 		LL_DEBUGS("Window") << "Keeping vertical sync" << LL_ENDL;
+        wglSwapIntervalEXT(1);
 	}
 
 	SetWindowLongPtr(mWindowHandle, GWLP_USERDATA, (LONG_PTR)this);
@@ -1813,31 +1903,41 @@ void LLWindowWin32::moveWindow( const LLCoordScreen& position, const LLCoordScre
 
 BOOL LLWindowWin32::setCursorPosition(const LLCoordWindow position)
 {
-	mMousePositionModified = TRUE;
+    ASSERT_MAIN_THREAD();
+
 	if (!mWindowHandle)
 	{
 		return FALSE;
 	}
 
-
-	// Inform the application of the new mouse position (needed for per-frame
+    // Inform the application of the new mouse position (needed for per-frame
 	// hover/picking to function).
 	mCallbacks->handleMouseMove(this, position.convert(), (MASK)0);
 	
-	// DEV-18951 VWR-8524 Camera moves wildly when alt-clicking.
-	// Because we have preemptively notified the application of the new
-	// mouse position via handleMouseMove() above, we need to clear out
-	// any stale mouse move events.  RN/JC
-	MSG msg;
-	while (PeekMessage(&msg, NULL, WM_MOUSEMOVE, WM_MOUSEMOVE, PM_REMOVE))
-	{ }
-
-	LLCoordScreen screen_pos(position.convert());
-	return ::SetCursorPos(screen_pos.mX, screen_pos.mY);
+    mMousePositionModified = TRUE;
+    LLCoordScreen screen_pos(position.convert());
+    
+    mWindowThread->post([=]
+        {
+            SetCursorPos(screen_pos.mX, screen_pos.mY);
+            // DEV-18951 VWR-8524 Camera moves wildly when alt-clicking.
+            // Because we have preemptively notified the application of the new
+            // mouse position via handleMouseMove() above, we need to clear out
+            // any stale mouse move events.  RN/JC
+            MSG msg;
+            while (PeekMessage(&msg, NULL, WM_MOUSEMOVE, WM_MOUSEMOVE, PM_REMOVE))
+            {
+            }
+            
+            mMousePositionModified = FALSE;
+        });
+
+    return TRUE;
 }
 
 BOOL LLWindowWin32::getCursorPosition(LLCoordWindow *position)
 {
+    ASSERT_MAIN_THREAD();
 	POINT cursor_point;
 
 	if (!mWindowHandle 
@@ -1853,21 +1953,35 @@ BOOL LLWindowWin32::getCursorPosition(LLCoordWindow *position)
 
 void LLWindowWin32::hideCursor()
 {
-	while (ShowCursor(FALSE) >= 0)
-	{
-		// nothing, wait for cursor to push down
-	}
+    ASSERT_MAIN_THREAD();
+
+    mWindowThread->post([=]()
+        {
+            while (ShowCursor(FALSE) >= 0)
+            {
+                // nothing, wait for cursor to push down
+            }
+        });
+
 	mCursorHidden = TRUE;
 	mHideCursorPermanent = TRUE;
 }
 
 void LLWindowWin32::showCursor()
 {
-	// makes sure the cursor shows up
-	while (ShowCursor(TRUE) < 0)
-	{
-		// do nothing, wait for cursor to pop out
-	}
+    LL_PROFILE_ZONE_SCOPED;
+
+    ASSERT_MAIN_THREAD();
+	
+    mWindowThread->post([=]()
+        {
+            // makes sure the cursor shows up
+            while (ShowCursor(TRUE) < 0)
+            {
+                // do nothing, wait for cursor to pop out
+            }
+        });
+
 	mCursorHidden = FALSE;
 	mHideCursorPermanent = FALSE;
 }
@@ -1969,6 +2083,8 @@ void LLWindowWin32::initCursors()
 
 void LLWindowWin32::updateCursor()
 {
+    ASSERT_MAIN_THREAD();
+    LL_PROFILE_ZONE_SCOPED
 	if (mNextCursor == UI_CURSOR_ARROW
 		&& mBusyCount > 0)
 	{
@@ -1978,7 +2094,11 @@ void LLWindowWin32::updateCursor()
 	if( mCurrentCursor != mNextCursor )
 	{
 		mCurrentCursor = mNextCursor;
-		SetCursor( mCursor[mNextCursor] );
+        auto nextCursor = mCursor[mNextCursor];
+        mWindowThread->post([=]()
+            {
+                SetCursor(nextCursor);
+            });
 	}
 }
 
@@ -1994,13 +2114,8 @@ void LLWindowWin32::captureMouse()
 
 void LLWindowWin32::releaseMouse()
 {
-	// *NOTE:Mani ReleaseCapture will spawn new windows messages...
-	// which will in turn call our MainWindowProc. It therefore requires
-	// pausing *and more importantly resumption* of the mainlooptimeout...
-	// just like DispatchMessage below.
-	mCallbacks->handlePauseWatchdog(this);
+    LL_PROFILE_ZONE_SCOPED;
 	ReleaseCapture();
-	mCallbacks->handleResumeWatchdog(this);
 }
 
 
@@ -2009,1003 +2124,1129 @@ void LLWindowWin32::delayInputProcessing()
 	mInputProcessingPaused = TRUE;
 }
 
+
 void LLWindowWin32::gatherInput()
 {
-	MSG		msg;
-	int		msg_count = 0;
+    ASSERT_MAIN_THREAD();
+    LL_PROFILE_ZONE_SCOPED
+    MSG msg;
 
-	while ((msg_count < MAX_MESSAGE_PER_UPDATE) && PeekMessage(&msg, NULL, 0, 0, PM_REMOVE))
-	{
-		mCallbacks->handlePingWatchdog(this, "Main:TranslateGatherInput");
-		TranslateMessage(&msg);
+#if LL_WINDOW_SINGLE_THREADED
+    int	msg_count = 0;
 
-		// turn watchdog off in here to not fail if windows is doing something wacky
-		mCallbacks->handlePauseWatchdog(this);
-		DispatchMessage(&msg);
-		mCallbacks->handleResumeWatchdog(this);
-		msg_count++;
+    while ((msg_count < MAX_MESSAGE_PER_UPDATE))
+    {
+        LL_PROFILE_ZONE_NAMED("gi - loop");
+        ++msg_count;
+        {
+            LL_PROFILE_ZONE_NAMED("gi - PeekMessage");
+            if (!PeekMessage(&msg, NULL, 0, 0, PM_REMOVE))
+            {
+                break;
+            }
+        }
 
-		if ( mInputProcessingPaused )
-		{
-			break;
-		}
-		/* Attempted workaround for problem where typing fast and hitting
-		   return would result in only part of the text being sent. JC
+        {
+            LL_PROFILE_ZONE_NAMED("gi - translate");
+            TranslateMessage(&msg);
+        }
+
+        {
+            LL_PROFILE_ZONE_NAMED("gi - dispatch");
+            DispatchMessage(&msg);
+        }
 
-		BOOL key_posted = TranslateMessage(&msg);
-		DispatchMessage(&msg);
-		msg_count++;
+        if (mInputProcessingPaused)
+        {
+            break;
+        }
 
-		// If a key was translated, a WM_CHAR might have been posted to the end
-		// of the event queue.  We need it immediately.
-		if (key_posted && msg.message == WM_KEYDOWN)
-		{
-			if (PeekMessage(&msg, NULL, WM_CHAR, WM_CHAR, PM_REMOVE))
-			{
-				TranslateMessage(&msg);
-				DispatchMessage(&msg);
-				msg_count++;
-			}
-		}
-		*/
-		mCallbacks->handlePingWatchdog(this, "Main:AsyncCallbackGatherInput");
-		// For async host by name support.  Really hacky.
-		if (gAsyncMsgCallback && (LL_WM_HOST_RESOLVED == msg.message))
-		{
-			gAsyncMsgCallback(msg);
-		}
-	}
+        // For async host by name support.  Really hacky.
+        if (gAsyncMsgCallback && (LL_WM_HOST_RESOLVED == msg.message))
+        {
+            LL_PROFILE_ZONE_NAMED("gi - callback");
+            gAsyncMsgCallback(msg);
+        }
+    }
+#else //multi-threaded window impl
+    {
+        if (mWindowThread->mFunctionQueue.size() > 0)
+        {
+            LL_PROFILE_ZONE_NAMED("gi - PostMessage");
+            if (mWindowHandle)
+            { // post a nonsense user message to wake up the Window Thread in case any functions are pending
+                // and no windows events came through this frame
+                PostMessage(mWindowHandle, WM_USER + 0x0017, 0xB0B0, 0x1337);
+            }
+        }
+        
+        while (mWindowThread->mMessageQueue.tryPopBack(msg))
+        {
+            LL_PROFILE_ZONE_NAMED("gi - message queue");
+            if (mInputProcessingPaused)
+            {
+                continue;
+            }
+
+            // For async host by name support.  Really hacky.
+            if (gAsyncMsgCallback && (LL_WM_HOST_RESOLVED == msg.message))
+            {
+                LL_PROFILE_ZONE_NAMED("gi - callback");
+                gAsyncMsgCallback(msg);
+            }
+        }
+    }
+
+    {
+        LL_PROFILE_ZONE_NAMED("gi - function queue");
+        //process any pending functions
+        std::function<void()> curFunc;
+        while (mFunctionQueue.tryPopBack(curFunc))
+        {
+            curFunc();
+        }
+    }
+#endif
 
 	mInputProcessingPaused = FALSE;
 
 	updateCursor();
-
-	// clear this once we've processed all mouse messages that might have occurred after
-	// we slammed the mouse position
-	mMousePositionModified = FALSE;
 }
 
 static LLTrace::BlockTimerStatHandle FTM_KEYHANDLER("Handle Keyboard");
 static LLTrace::BlockTimerStatHandle FTM_MOUSEHANDLER("Handle Mouse");
 
+#if LL_WINDOW_SINGLE_THREADED
+#define WINDOW_IMP_POST(x) x
+#else
+#define WINDOW_IMP_POST(x) window_imp->post([=]() { x; })
+#endif
+
 LRESULT CALLBACK LLWindowWin32::mainWindowProc(HWND h_wnd, UINT u_msg, WPARAM w_param, LPARAM l_param)
 {
-	// Ignore clicks not originated in the client area, i.e. mouse-up events not preceded with a WM_LBUTTONDOWN.
-	// This helps prevent avatar walking after maximizing the window by double-clicking the title bar.
-	static bool sHandleLeftMouseUp = true;
+    ASSERT_WINDOW_THREAD();
+    LL_PROFILE_ZONE_SCOPED;
 
-	// Ignore the double click received right after activating app.
-	// This is to avoid triggering double click teleport after returning focus (see MAINT-3786).
-	static bool sHandleDoubleClick = true;
+    // Ignore clicks not originated in the client area, i.e. mouse-up events not preceded with a WM_LBUTTONDOWN.
+    // This helps prevent avatar walking after maximizing the window by double-clicking the title bar.
+    static bool sHandleLeftMouseUp = true;
 
-	LLWindowWin32 *window_imp = (LLWindowWin32 *)GetWindowLongPtr( h_wnd, GWLP_USERDATA );
+    // Ignore the double click received right after activating app.
+    // This is to avoid triggering double click teleport after returning focus (see MAINT-3786).
+    static bool sHandleDoubleClick = true;
 
-	bool debug_window_proc = gDebugWindowProc || debugLoggingEnabled("Window");
+    LLWindowWin32* window_imp = (LLWindowWin32*)GetWindowLongPtr(h_wnd, GWLP_USERDATA);
 
+    bool debug_window_proc = false; // gDebugWindowProc || debugLoggingEnabled("Window");
 
-	if (NULL != window_imp)
-	{
-		window_imp->mCallbacks->handleResumeWatchdog(window_imp);
-		window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:StartWndProc");
-		// Has user provided their own window callback?
-		if (NULL != window_imp->mWndProc)
-		{
-			if (!window_imp->mWndProc(h_wnd, u_msg, w_param, l_param))
-			{
-				// user has handled window message
-				return 0;
-			}
-		}
-
-		window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:PreSwitchWndProc");
-		
-		// Juggle to make sure we can get negative positions for when
-		// mouse is outside window.
-		LLCoordWindow window_coord((S32)(S16)LOWORD(l_param), (S32)(S16)HIWORD(l_param));
-
-		// This doesn't work, as LOWORD returns unsigned short.
-		//LLCoordWindow window_coord(LOWORD(l_param), HIWORD(l_param));
-		LLCoordGL gl_coord;
-
-		// pass along extended flag in mask
-		MASK mask = (l_param>>16 & KF_EXTENDED) ? MASK_EXTENDED : 0x0;
-		BOOL eat_keystroke = TRUE;
-
-		switch(u_msg)
-		{
-			RECT	update_rect;
-			S32		update_width;
-			S32		update_height;
-
-		case WM_TIMER:
-			window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_TIMER");
-			window_imp->mCallbacks->handleTimerEvent(window_imp);
-			break;
-
-		case WM_DEVICECHANGE:
-			window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_DEVICECHANGE");
-			if (debug_window_proc)
-			{
-				LL_INFOS("Window") << "  WM_DEVICECHANGE: wParam=" << w_param 
-						<< "; lParam=" << l_param << LL_ENDL;
-			}
-			if (w_param == DBT_DEVNODES_CHANGED || w_param == DBT_DEVICEARRIVAL)
-			{
-				if (window_imp->mCallbacks->handleDeviceChange(window_imp))
-				{
-					return 0;
-				}
-			}
-			break;
-
-		case WM_PAINT:
-			window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_PAINT");
-			GetUpdateRect(window_imp->mWindowHandle, &update_rect, FALSE);
-			update_width = update_rect.right - update_rect.left + 1;
-			update_height = update_rect.bottom - update_rect.top + 1;
-			window_imp->mCallbacks->handlePaint(window_imp, update_rect.left, update_rect.top,
-				update_width, update_height);
-			break;
-		case WM_PARENTNOTIFY:
-			window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_PARENTNOTIFY");
-			u_msg = u_msg;
-			break;
-
-		case WM_SETCURSOR:
-			// This message is sent whenever the cursor is moved in a window.
-			// You need to set the appropriate cursor appearance.
-
-			// Only take control of cursor over client region of window
-			// This allows Windows(tm) to handle resize cursors, etc.
-			window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_SETCURSOR");
-			if (LOWORD(l_param) == HTCLIENT)
-			{
-				SetCursor(window_imp->mCursor[ window_imp->mCurrentCursor] );
-				return 0;
-			}
-			break;
-
-		case WM_ENTERMENULOOP:
-			window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_ENTERMENULOOP");
-			window_imp->mCallbacks->handleWindowBlock(window_imp);
-			break;
-
-		case WM_EXITMENULOOP:
-			window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_EXITMENULOOP");
-			window_imp->mCallbacks->handleWindowUnblock(window_imp);
-			break;
-
-		case WM_ACTIVATEAPP:
-			window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_ACTIVATEAPP");
-			{
-				// This message should be sent whenever the app gains or loses focus.
-				BOOL activating = (BOOL) w_param;
-				BOOL minimized = window_imp->getMinimized();
-
-				if (debug_window_proc)
-				{
-					LL_INFOS("Window") << "WINDOWPROC ActivateApp "
-						<< " activating " << S32(activating)
-						<< " minimized " << S32(minimized)
-						<< " fullscreen " << S32(window_imp->mFullscreen)
-						<< LL_ENDL;
-				}
-
-				if (window_imp->mFullscreen)
-				{
-					// When we run fullscreen, restoring or minimizing the app needs 
-					// to switch the screen resolution
-					if (activating)
-					{
-						window_imp->setFullscreenResolution();
-						window_imp->restore();
-					}
-					else
-					{
-						window_imp->minimize();
-						window_imp->resetDisplayResolution();
-					}
-				}
-
-				if (!activating)
-				{
-					sHandleDoubleClick = false;
-				}
-
-				window_imp->mCallbacks->handleActivateApp(window_imp, activating);
-
-				break;
-			}
-
-		case WM_ACTIVATE:
-			window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_ACTIVATE");
-			{
-				// Can be one of WA_ACTIVE, WA_CLICKACTIVE, or WA_INACTIVE
-				BOOL activating = (LOWORD(w_param) != WA_INACTIVE);
-
-				BOOL minimized = BOOL(HIWORD(w_param));
-
-				if (!activating && LLWinImm::isAvailable() && window_imp->mPreeditor)
-				{
-					window_imp->interruptLanguageTextInput();
-				}
-
-				// JC - I'm not sure why, but if we don't report that we handled the 
-				// WM_ACTIVATE message, the WM_ACTIVATEAPP messages don't work 
-				// properly when we run fullscreen.
-				if (debug_window_proc)
-				{
-					LL_INFOS("Window") << "WINDOWPROC Activate "
-						<< " activating " << S32(activating) 
-						<< " minimized " << S32(minimized)
-						<< LL_ENDL;
-				}
-
-				// Don't handle this.
-				break;
-			}
-
-		case WM_QUERYOPEN:
-			// TODO: use this to return a nice icon
-			break;
-
-		case WM_SYSCOMMAND:
-			window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_SYSCOMMAND");
-			switch(w_param)
-			{
-			case SC_KEYMENU: 
-				// Disallow the ALT key from triggering the default system menu.
-				return 0;		
-
-			case SC_SCREENSAVE:
-			case SC_MONITORPOWER:
-				// eat screen save messages and prevent them!
-				return 0;
-			}
-			break;
-
-		case WM_CLOSE:
-			window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_CLOSE");
-			// Will the app allow the window to close?
-			if (window_imp->mCallbacks->handleCloseRequest(window_imp))
-			{
-				// Get the app to initiate cleanup.
-				window_imp->mCallbacks->handleQuit(window_imp);
-				// The app is responsible for calling destroyWindow when done with GL
-			}
-			return 0;
-
-		case WM_DESTROY:
-			window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_DESTROY");
-			if (window_imp->shouldPostQuit())
-			{
-				PostQuitMessage(0);  // Posts WM_QUIT with an exit code of 0
-			}
-			return 0;
-
-		case WM_COMMAND:
-			window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_COMMAND");
-			if (!HIWORD(w_param)) // this message is from a menu
-			{
-				window_imp->mCallbacks->handleMenuSelect(window_imp, LOWORD(w_param));
-			}
-			break;
-
-		case WM_SYSKEYDOWN:
-			window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_SYSKEYDOWN");
-			// allow system keys, such as ALT-F4 to be processed by Windows
-			eat_keystroke = FALSE;
-		case WM_KEYDOWN:
-			window_imp->mKeyCharCode = 0; // don't know until wm_char comes in next
-			window_imp->mKeyScanCode = ( l_param >> 16 ) & 0xff;
-			window_imp->mKeyVirtualKey = w_param;
-			window_imp->mRawMsg = u_msg;
-			window_imp->mRawWParam = w_param;
-			window_imp->mRawLParam = l_param;
-
-			window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_KEYDOWN");
-			{
-				if (debug_window_proc)
-				{
-					LL_INFOS("Window") << "Debug WindowProc WM_KEYDOWN "
-						<< " key " << S32(w_param) 
-						<< LL_ENDL;
-				}
-				if(gKeyboard->handleKeyDown(w_param, mask) && eat_keystroke)
-				{
-					return 0;
-				}
-				// pass on to windows if we didn't handle it
-				break;
-			}
-		case WM_SYSKEYUP:
-			eat_keystroke = FALSE;
-		case WM_KEYUP:
-		{
-			window_imp->mKeyScanCode = ( l_param >> 16 ) & 0xff;
-			window_imp->mKeyVirtualKey = w_param;
-			window_imp->mRawMsg = u_msg;
-			window_imp->mRawWParam = w_param;
-			window_imp->mRawLParam = l_param;
-
-			window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_KEYUP");
-			LL_RECORD_BLOCK_TIME(FTM_KEYHANDLER);
-
-			if (debug_window_proc)
-			{
-				LL_INFOS("Window") << "Debug WindowProc WM_KEYUP "
-					<< " key " << S32(w_param) 
-					<< LL_ENDL;
-			}
-			if (gKeyboard->handleKeyUp(w_param, mask) && eat_keystroke)
-			{
-				return 0;
-			}
+    if (NULL != window_imp)
+    {
+        // Has user provided their own window callback?
+        if (NULL != window_imp->mWndProc)
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WndProc");
+            if (!window_imp->mWndProc(h_wnd, u_msg, w_param, l_param))
+            {
+                // user has handled window message
+                return 0;
+            }
+        }
 
-			// pass on to windows
-			break;
-		}
-		case WM_IME_SETCONTEXT:
-			window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_IME_SETCONTEXT");
-			if (debug_window_proc)
-			{
-				LL_INFOS("Window") << "WM_IME_SETCONTEXT" << LL_ENDL;
-			}
-			if (LLWinImm::isAvailable() && window_imp->mPreeditor)
-			{
-				l_param &= ~ISC_SHOWUICOMPOSITIONWINDOW;
-				// Invoke DefWinProc with the modified LPARAM.
-			}
-			break;
+        // Juggle to make sure we can get negative positions for when
+        // mouse is outside window.
+        LLCoordWindow window_coord((S32)(S16)LOWORD(l_param), (S32)(S16)HIWORD(l_param));
 
-		case WM_IME_STARTCOMPOSITION:
-			window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_IME_STARTCOMPOSITION");
-			if (debug_window_proc)
-			{
-				LL_INFOS() << "WM_IME_STARTCOMPOSITION" << LL_ENDL;
-			}
-			if (LLWinImm::isAvailable() && window_imp->mPreeditor)
-			{
-				window_imp->handleStartCompositionMessage();
-				return 0;
-			}
-			break;
+        // This doesn't work, as LOWORD returns unsigned short.
+        //LLCoordWindow window_coord(LOWORD(l_param), HIWORD(l_param));
+        LLCoordGL gl_coord;
 
-		case WM_IME_ENDCOMPOSITION:
-			window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_IME_ENDCOMPOSITION");
-			if (debug_window_proc)
-			{
-				LL_INFOS() << "WM_IME_ENDCOMPOSITION" << LL_ENDL;
-			}
-			if (LLWinImm::isAvailable() && window_imp->mPreeditor)
-			{
-				return 0;
-			}
-			break;
+        // pass along extended flag in mask
+        MASK mask = (l_param >> 16 & KF_EXTENDED) ? MASK_EXTENDED : 0x0;
+        BOOL eat_keystroke = TRUE;
 
-		case WM_IME_COMPOSITION:
-			window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_IME_COMPOSITION");
-			if (debug_window_proc)
-			{
-				LL_INFOS() << "WM_IME_COMPOSITION" << LL_ENDL;
-			}
-			if (LLWinImm::isAvailable() && window_imp->mPreeditor)
-			{
-				window_imp->handleCompositionMessage(l_param);
-				return 0;
-			}
-			break;
+        switch (u_msg)
+        {
+            RECT	update_rect;
+            S32		update_width;
+            S32		update_height;
 
-		case WM_IME_REQUEST:
-			window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_IME_REQUEST");
-			if (debug_window_proc)
-			{
-				LL_INFOS() << "WM_IME_REQUEST" << LL_ENDL;
-			}
-			if (LLWinImm::isAvailable() && window_imp->mPreeditor)
-			{
-				LRESULT result = 0;
-				if (window_imp->handleImeRequests(w_param, l_param, &result))
-				{
-					return result;
-				}
-			}
-			break;
+        case WM_TIMER:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_TIMER");
+            WINDOW_IMP_POST(window_imp->mCallbacks->handleTimerEvent(window_imp));
+            break;
+        }
 
-		case WM_CHAR:
-			window_imp->mKeyCharCode = w_param;
-			window_imp->mRawMsg = u_msg;
-			window_imp->mRawWParam = w_param;
-			window_imp->mRawLParam = l_param;
-
-			// Should really use WM_UNICHAR eventually, but it requires a specific Windows version and I need
-			// to figure out how that works. - Doug
-			//
-			// ... Well, I don't think so.
-			// How it works is explained in Win32 API document, but WM_UNICHAR didn't work
-			// as specified at least on Windows XP SP1 Japanese version.  I have never used
-			// it since then, and I'm not sure whether it has been fixed now, but I don't think
-			// it is worth trying.  The good old WM_CHAR works just fine even for supplementary
-			// characters.  We just need to take care of surrogate pairs sent as two WM_CHAR's
-			// by ourselves.  It is not that tough.  -- Alissa Sabre @ SL
-			window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_CHAR");
-			if (debug_window_proc)
-			{
-				LL_INFOS("Window") << "Debug WindowProc WM_CHAR "
-					<< " key " << S32(w_param) 
-					<< LL_ENDL;
-			}
-			// Even if LLWindowCallbacks::handleUnicodeChar(llwchar, BOOL) returned FALSE,
-			// we *did* processed the event, so I believe we should not pass it to DefWindowProc...
-			window_imp->handleUnicodeUTF16((U16)w_param, gKeyboard->currentMask(FALSE));
-			return 0;
+        case WM_DEVICECHANGE:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_DEVICECHANGE");
+            if (debug_window_proc)
+            {
+                LL_INFOS("Window") << "  WM_DEVICECHANGE: wParam=" << w_param
+                    << "; lParam=" << l_param << LL_ENDL;
+            }
+            if (w_param == DBT_DEVNODES_CHANGED || w_param == DBT_DEVICEARRIVAL)
+            {
+                WINDOW_IMP_POST(window_imp->mCallbacks->handleDeviceChange(window_imp));
+                
+                return TRUE;
+            }
+            break;
+        }
 
-		case WM_NCLBUTTONDOWN:
-			{
-				window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_NCLBUTTONDOWN");
-				// A click in a non-client area, e.g. title bar or window border.
-				sHandleLeftMouseUp = false;
-				sHandleDoubleClick = true;
-			}
-			break;
+        case WM_PAINT:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_PAINT");
+            GetUpdateRect(window_imp->mWindowHandle, &update_rect, FALSE);
+            update_width = update_rect.right - update_rect.left + 1;
+            update_height = update_rect.bottom - update_rect.top + 1;
+
+            WINDOW_IMP_POST(window_imp->mCallbacks->handlePaint(window_imp, update_rect.left, update_rect.top,
+                update_width, update_height));
+            break;
+        }
+        case WM_PARENTNOTIFY:
+        {
+            break;
+        }
 
-		case WM_LBUTTONDOWN:
-			{
-				window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_LBUTTONDOWN");
-				LL_RECORD_BLOCK_TIME(FTM_MOUSEHANDLER);
-				sHandleLeftMouseUp = true;
+        case WM_SETCURSOR:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_SETCURSOR");
+            // This message is sent whenever the cursor is moved in a window.
+            // You need to set the appropriate cursor appearance.
 
-				if (LLWinImm::isAvailable() && window_imp->mPreeditor)
-				{
-					window_imp->interruptLanguageTextInput();
-				}
+            // Only take control of cursor over client region of window
+            // This allows Windows(tm) to handle resize cursors, etc.
+            if (LOWORD(l_param) == HTCLIENT)
+            {
+                SetCursor(window_imp->mCursor[window_imp->mCurrentCursor]);
+                return 0;
+            }
+            break;
+        }
+        case WM_ENTERMENULOOP:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_ENTERMENULOOP");
+            WINDOW_IMP_POST(window_imp->mCallbacks->handleWindowBlock(window_imp));
+            break;
+        }
 
-				// Because we move the cursor position in the app, we need to query
-				// to find out where the cursor at the time the event is handled.
-				// If we don't do this, many clicks could get buffered up, and if the
-				// first click changes the cursor position, all subsequent clicks
-				// will occur at the wrong location.  JC
-				if (window_imp->mMousePositionModified)
-				{
-					LLCoordWindow cursor_coord_window;
-					window_imp->getCursorPosition(&cursor_coord_window);
-					gl_coord = cursor_coord_window.convert();
-				}
-				else
-				{
-					gl_coord = window_coord.convert();
-				}
-				MASK mask = gKeyboard->currentMask(TRUE);
-				// generate move event to update mouse coordinates
-				window_imp->mCallbacks->handleMouseMove(window_imp, gl_coord, mask);
-				if (window_imp->mCallbacks->handleMouseDown(window_imp, gl_coord, mask))
-				{
-					return 0;
-				}
-			}
-			break;
+        case WM_EXITMENULOOP:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_EXITMENULOOP");
+            WINDOW_IMP_POST(window_imp->mCallbacks->handleWindowUnblock(window_imp));
+            break;
+        }
 
-		case WM_LBUTTONDBLCLK:
-		//RN: ignore right button double clicks for now
-		//case WM_RBUTTONDBLCLK:
-			{
-				window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_LBUTTONDBLCLK");
+        case WM_ACTIVATEAPP:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_ACTIVATEAPP");
+            window_imp->post([=]()
+                {
+                    // This message should be sent whenever the app gains or loses focus.
+                    BOOL activating = (BOOL)w_param;
+                    BOOL minimized = window_imp->getMinimized();
+
+                    if (debug_window_proc)
+                    {
+                        LL_INFOS("Window") << "WINDOWPROC ActivateApp "
+                            << " activating " << S32(activating)
+                            << " minimized " << S32(minimized)
+                            << " fullscreen " << S32(window_imp->mFullscreen)
+                            << LL_ENDL;
+                    }
+
+                    if (window_imp->mFullscreen)
+                    {
+                        // When we run fullscreen, restoring or minimizing the app needs 
+                        // to switch the screen resolution
+                        if (activating)
+                        {
+                            window_imp->setFullscreenResolution();
+                            window_imp->restore();
+                        }
+                        else
+                        {
+                            window_imp->minimize();
+                            window_imp->resetDisplayResolution();
+                        }
+                    }
+
+                    if (!activating)
+                    {
+                        sHandleDoubleClick = false;
+                    }
+
+                    window_imp->mCallbacks->handleActivateApp(window_imp, activating);
+                });
+            break;
+        }
+        case WM_ACTIVATE:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_ACTIVATE");
+            window_imp->post([=]()
+                {
+                    // Can be one of WA_ACTIVE, WA_CLICKACTIVE, or WA_INACTIVE
+                    BOOL activating = (LOWORD(w_param) != WA_INACTIVE);
+
+                    BOOL minimized = BOOL(HIWORD(w_param));
+
+                    if (!activating && LLWinImm::isAvailable() && window_imp->mPreeditor)
+                    {
+                        window_imp->interruptLanguageTextInput();
+                    }
+
+                    // JC - I'm not sure why, but if we don't report that we handled the 
+                    // WM_ACTIVATE message, the WM_ACTIVATEAPP messages don't work 
+                    // properly when we run fullscreen.
+                    if (debug_window_proc)
+                    {
+                        LL_INFOS("Window") << "WINDOWPROC Activate "
+                            << " activating " << S32(activating)
+                            << " minimized " << S32(minimized)
+                            << LL_ENDL;
+                    }
+                });
+            
+            break;
+        }
 
-				if (!sHandleDoubleClick)
-				{
-					sHandleDoubleClick = true;
-					break;
-				}
+        case WM_QUERYOPEN:
+            // TODO: use this to return a nice icon
+            break;
 
-				// Because we move the cursor position in the app, we need to query
-				// to find out where the cursor at the time the event is handled.
-				// If we don't do this, many clicks could get buffered up, and if the
-				// first click changes the cursor position, all subsequent clicks
-				// will occur at the wrong location.  JC
-				if (window_imp->mMousePositionModified)
-				{
-					LLCoordWindow cursor_coord_window;
-					window_imp->getCursorPosition(&cursor_coord_window);
-					gl_coord = cursor_coord_window.convert();
-				}
-				else
-				{
-					gl_coord = window_coord.convert();
-				}
-				MASK mask = gKeyboard->currentMask(TRUE);
-				// generate move event to update mouse coordinates
-				window_imp->mCallbacks->handleMouseMove(window_imp, gl_coord, mask);
-				if (window_imp->mCallbacks->handleDoubleClick(window_imp, gl_coord, mask) )
-				{
-					return 0;
-				}
-			}
-			break;
+        case WM_SYSCOMMAND:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_SYSCOMMAND");
+            switch (w_param)
+            {
+            case SC_KEYMENU:
+                // Disallow the ALT key from triggering the default system menu.
+                return 0;
+
+            case SC_SCREENSAVE:
+            case SC_MONITORPOWER:
+                // eat screen save messages and prevent them!
+                return 0;
+            }
+            break;
+        }
+        case WM_CLOSE:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_CLOSE");
+            window_imp->post([=]()
+                {
+                    // Will the app allow the window to close?
+                    if (window_imp->mCallbacks->handleCloseRequest(window_imp))
+                    {
+                        // Get the app to initiate cleanup.
+                        window_imp->mCallbacks->handleQuit(window_imp);
+                        // The app is responsible for calling destroyWindow when done with GL
+                    }
+                });
+            return 0;
+        }
+        case WM_DESTROY:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_DESTROY");
+            if (window_imp->shouldPostQuit())
+            {
+                PostQuitMessage(0);  // Posts WM_QUIT with an exit code of 0
+            }
+            return 0;
+        }
+        case WM_COMMAND:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_COMMAND");
+            if (!HIWORD(w_param)) // this message is from a menu
+            {
+                WINDOW_IMP_POST(window_imp->mCallbacks->handleMenuSelect(window_imp, LOWORD(w_param)));
+            }
+            break;
+        }
+        case WM_SYSKEYDOWN:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_SYSKEYDOWN");
+            // allow system keys, such as ALT-F4 to be processed by Windows
+            eat_keystroke = FALSE;
+        }
+        case WM_KEYDOWN:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_KEYDOWN");
+            window_imp->post([=]()
+                {
+                    window_imp->mKeyCharCode = 0; // don't know until wm_char comes in next
+                    window_imp->mKeyScanCode = (l_param >> 16) & 0xff;
+                    window_imp->mKeyVirtualKey = w_param;
+                    window_imp->mRawMsg = u_msg;
+                    window_imp->mRawWParam = w_param;
+                    window_imp->mRawLParam = l_param;
+
+                    {
+                        if (debug_window_proc)
+                        {
+                            LL_INFOS("Window") << "Debug WindowProc WM_KEYDOWN "
+                                << " key " << S32(w_param)
+                                << LL_ENDL;
+                        }
+                        
+                        gKeyboard->handleKeyDown(w_param, mask);
+                    }
+                });
+                return eat_keystroke;
+        }
+        case WM_SYSKEYUP:
+            eat_keystroke = FALSE;
+        case WM_KEYUP:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_KEYUP");
+            window_imp->post([=]()
+            {
+                window_imp->mKeyScanCode = (l_param >> 16) & 0xff;
+                window_imp->mKeyVirtualKey = w_param;
+                window_imp->mRawMsg = u_msg;
+                window_imp->mRawWParam = w_param;
+                window_imp->mRawLParam = l_param;
+
+                {
+                    LL_RECORD_BLOCK_TIME(FTM_KEYHANDLER);
+
+                    if (debug_window_proc)
+                    {
+                        LL_INFOS("Window") << "Debug WindowProc WM_KEYUP "
+                            << " key " << S32(w_param)
+                            << LL_ENDL;
+                    }
+                    gKeyboard->handleKeyUp(w_param, mask);
+                }
+            });
+            return eat_keystroke;
+        }
+        case WM_IME_SETCONTEXT:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_IME_SETCONTEXT");
+            if (debug_window_proc)
+            {
+                LL_INFOS("Window") << "WM_IME_SETCONTEXT" << LL_ENDL;
+            }
+            if (LLWinImm::isAvailable() && window_imp->mPreeditor)
+            {
+                l_param &= ~ISC_SHOWUICOMPOSITIONWINDOW;
+                // Invoke DefWinProc with the modified LPARAM.
+            }
+            break;
+        }
+        case WM_IME_STARTCOMPOSITION:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_IME_STARTCOMPOSITION");
+            if (debug_window_proc)
+            {
+                LL_INFOS() << "WM_IME_STARTCOMPOSITION" << LL_ENDL;
+            }
+            if (LLWinImm::isAvailable() && window_imp->mPreeditor)
+            {
+                WINDOW_IMP_POST(window_imp->handleStartCompositionMessage());
+                return 0;
+            }
+            break;
+        }
+        case WM_IME_ENDCOMPOSITION:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_IME_ENDCOMPOSITION");
+            if (debug_window_proc)
+            {
+                LL_INFOS() << "WM_IME_ENDCOMPOSITION" << LL_ENDL;
+            }
+            if (LLWinImm::isAvailable() && window_imp->mPreeditor)
+            {
+                return 0;
+            }
+            break;
+        }
+        case WM_IME_COMPOSITION:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_IME_COMPOSITION");
+            if (debug_window_proc)
+            {
+                LL_INFOS() << "WM_IME_COMPOSITION" << LL_ENDL;
+            }
+            if (LLWinImm::isAvailable() && window_imp->mPreeditor)
+            {
+                WINDOW_IMP_POST(window_imp->handleCompositionMessage(l_param));
+                return 0;
+            }
+            break;
+        }
+        case WM_IME_REQUEST:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_IME_REQUEST");
+            if (debug_window_proc)
+            {
+                LL_INFOS() << "WM_IME_REQUEST" << LL_ENDL;
+            }
+            if (LLWinImm::isAvailable() && window_imp->mPreeditor)
+            {
+                LRESULT result;
+                window_imp->handleImeRequests(w_param, l_param, &result);
+                return result;
+            }
+            break;
+        }
+        case WM_CHAR:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_CHAR");
+            window_imp->post([=]()
+                {
+                    window_imp->mKeyCharCode = w_param;
+                    window_imp->mRawMsg = u_msg;
+                    window_imp->mRawWParam = w_param;
+                    window_imp->mRawLParam = l_param;
+
+                    // Should really use WM_UNICHAR eventually, but it requires a specific Windows version and I need
+                    // to figure out how that works. - Doug
+                    //
+                    // ... Well, I don't think so.
+                    // How it works is explained in Win32 API document, but WM_UNICHAR didn't work
+                    // as specified at least on Windows XP SP1 Japanese version.  I have never used
+                    // it since then, and I'm not sure whether it has been fixed now, but I don't think
+                    // it is worth trying.  The good old WM_CHAR works just fine even for supplementary
+                    // characters.  We just need to take care of surrogate pairs sent as two WM_CHAR's
+                    // by ourselves.  It is not that tough.  -- Alissa Sabre @ SL
+                    if (debug_window_proc)
+                    {
+                        LL_INFOS("Window") << "Debug WindowProc WM_CHAR "
+                            << " key " << S32(w_param)
+                            << LL_ENDL;
+                    }
+                    // Even if LLWindowCallbacks::handleUnicodeChar(llwchar, BOOL) returned FALSE,
+                    // we *did* processed the event, so I believe we should not pass it to DefWindowProc...
+                    window_imp->handleUnicodeUTF16((U16)w_param, gKeyboard->currentMask(FALSE));
+                });
+            return 0;
+        }
+        case WM_NCLBUTTONDOWN:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_NCLBUTTONDOWN");
+            {
+                // A click in a non-client area, e.g. title bar or window border.
+                window_imp->post([=]()
+                    {
+                        sHandleLeftMouseUp = false;
+                        sHandleDoubleClick = true;
+                    });
+            }
+            break;
+        }
+        case WM_LBUTTONDOWN:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_LBUTTONDOWN");
+            {
+                LL_RECORD_BLOCK_TIME(FTM_MOUSEHANDLER);
+                window_imp->post([=]()
+                    {
+                        auto glc = gl_coord;
+                        sHandleLeftMouseUp = true;
+
+                        if (LLWinImm::isAvailable() && window_imp->mPreeditor)
+                        {
+                            window_imp->interruptLanguageTextInput();
+                        }
+
+                        // Because we move the cursor position in the app, we need to query
+                        // to find out where the cursor at the time the event is handled.
+                        // If we don't do this, many clicks could get buffered up, and if the
+                        // first click changes the cursor position, all subsequent clicks
+                        // will occur at the wrong location.  JC
+                        if (window_imp->mMousePositionModified)
+                        {
+                            LLCoordWindow cursor_coord_window;
+                            window_imp->getCursorPosition(&cursor_coord_window);
+                            glc = cursor_coord_window.convert();
+                        }
+                        else
+                        {
+                            glc = window_coord.convert();
+                        }
+                        MASK mask = gKeyboard->currentMask(TRUE);
+                        // generate move event to update mouse coordinates
+                        window_imp->mCallbacks->handleMouseMove(window_imp, glc, mask);
+                        window_imp->mCallbacks->handleMouseDown(window_imp, glc, mask);
+                    });
+
+                return 0;
+            }
+            break;
+        }
 
-		case WM_LBUTTONUP:
-			{
-				window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_LBUTTONUP");
-				LL_RECORD_BLOCK_TIME(FTM_MOUSEHANDLER);
+        case WM_LBUTTONDBLCLK:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_LBUTTONDBLCLK");
+            //RN: ignore right button double clicks for now
+            //case WM_RBUTTONDBLCLK:
+            if (!sHandleDoubleClick)
+            {
+                sHandleDoubleClick = true;
+                return 0;
+            }
 
-				if (!sHandleLeftMouseUp)
-				{
-					sHandleLeftMouseUp = true;
-					break;
-				}
-				sHandleDoubleClick = true;
-
-				//if (gDebugClicks)
-				//{
-				//	LL_INFOS("Window") << "WndProc left button up" << LL_ENDL;
-				//}
-				// Because we move the cursor position in the app, we need to query
-				// to find out where the cursor at the time the event is handled.
-				// If we don't do this, many clicks could get buffered up, and if the
-				// first click changes the cursor position, all subsequent clicks
-				// will occur at the wrong location.  JC
-				if (window_imp->mMousePositionModified)
-				{
-					LLCoordWindow cursor_coord_window;
-					window_imp->getCursorPosition(&cursor_coord_window);
-					gl_coord = cursor_coord_window.convert();
-				}
-				else
-				{
-					gl_coord = window_coord.convert();
-				}
-				MASK mask = gKeyboard->currentMask(TRUE);
-				// generate move event to update mouse coordinates
-				window_imp->mCallbacks->handleMouseMove(window_imp, gl_coord, mask);
-				if (window_imp->mCallbacks->handleMouseUp(window_imp, gl_coord, mask))
-				{
-					return 0;
-				}
-			}
-			break;
+            // Because we move the cursor position in the app, we need to query
+            // to find out where the cursor at the time the event is handled.
+            // If we don't do this, many clicks could get buffered up, and if the
+            // first click changes the cursor position, all subsequent clicks
+            // will occur at the wrong location.  JC
+            if (window_imp->mMousePositionModified)
+            {
+                LLCoordWindow cursor_coord_window;
+                window_imp->getCursorPosition(&cursor_coord_window);
+                gl_coord = cursor_coord_window.convert();
+            }
+            else
+            {
+                gl_coord = window_coord.convert();
+            }
+            MASK mask = gKeyboard->currentMask(TRUE);
+            // generate move event to update mouse coordinates
+            window_imp->post([=]()
+                {
+                    window_imp->mCallbacks->handleMouseMove(window_imp, gl_coord, mask);
+                    window_imp->mCallbacks->handleDoubleClick(window_imp, gl_coord, mask);
+                });
+            return 0;
+        }
+        case WM_LBUTTONUP:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_LBUTTONUP");
+            {
+                LL_RECORD_BLOCK_TIME(FTM_MOUSEHANDLER);
+
+                if (!sHandleLeftMouseUp)
+                {
+                    sHandleLeftMouseUp = true;
+                    return 0;
+                }
+                sHandleDoubleClick = true;
+                window_imp->post([=]()
+                    {
+                        auto glc = gl_coord;
+
+                        //if (gDebugClicks)
+                        //{
+                        //	LL_INFOS("Window") << "WndProc left button up" << LL_ENDL;
+                        //}
+                        // Because we move the cursor position in the app, we need to query
+                        // to find out where the cursor at the time the event is handled.
+                        // If we don't do this, many clicks could get buffered up, and if the
+                        // first click changes the cursor position, all subsequent clicks
+                        // will occur at the wrong location.  JC
+                        if (window_imp->mMousePositionModified)
+                        {
+                            LLCoordWindow cursor_coord_window;
+                            window_imp->getCursorPosition(&cursor_coord_window);
+                            glc = cursor_coord_window.convert();
+                        }
+                        else
+                        {
+                            glc = window_coord.convert();
+                        }
+                        MASK mask = gKeyboard->currentMask(TRUE);
+                        // generate move event to update mouse coordinates
+                        window_imp->mCallbacks->handleMouseMove(window_imp, glc, mask);
+                        window_imp->mCallbacks->handleMouseUp(window_imp, glc, mask);
+                    });
+            }
+            return 0;
+        }
+        case WM_RBUTTONDBLCLK:
+        case WM_RBUTTONDOWN:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_RBUTTONDOWN");
+            {
+                LL_RECORD_BLOCK_TIME(FTM_MOUSEHANDLER);
+                if (LLWinImm::isAvailable() && window_imp->mPreeditor)
+                {
+                    WINDOW_IMP_POST(window_imp->interruptLanguageTextInput());
+                }
+
+                // Because we move the cursor position in the llviewerapp, we need to query
+                // to find out where the cursor at the time the event is handled.
+                // If we don't do this, many clicks could get buffered up, and if the
+                // first click changes the cursor position, all subsequent clicks
+                // will occur at the wrong location.  JC
+                if (window_imp->mMousePositionModified)
+                {
+                    LLCoordWindow cursor_coord_window;
+                    window_imp->getCursorPosition(&cursor_coord_window);
+                    gl_coord = cursor_coord_window.convert();
+                }
+                else
+                {
+                    gl_coord = window_coord.convert();
+                }
+                MASK mask = gKeyboard->currentMask(TRUE);
+                // generate move event to update mouse coordinates
+                window_imp->post([=]()
+                    {
+                        window_imp->mCallbacks->handleMouseMove(window_imp, gl_coord, mask);
+                        window_imp->mCallbacks->handleRightMouseDown(window_imp, gl_coord, mask);
+                    });
+            }
+            return 0;
+        }
+        break;
 
-		case WM_RBUTTONDBLCLK:
-		case WM_RBUTTONDOWN:
-			{
-				window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_RBUTTONDOWN");
-				LL_RECORD_BLOCK_TIME(FTM_MOUSEHANDLER);
-				if (LLWinImm::isAvailable() && window_imp->mPreeditor)
-				{
-					window_imp->interruptLanguageTextInput();
-				}
+        case WM_RBUTTONUP:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_RBUTTONUP");
+            {
+                LL_RECORD_BLOCK_TIME(FTM_MOUSEHANDLER);
+                // Because we move the cursor position in the app, we need to query
+                // to find out where the cursor at the time the event is handled.
+                // If we don't do this, many clicks could get buffered up, and if the
+                // first click changes the cursor position, all subsequent clicks
+                // will occur at the wrong location.  JC
+                if (window_imp->mMousePositionModified)
+                {
+                    LLCoordWindow cursor_coord_window;
+                    window_imp->getCursorPosition(&cursor_coord_window);
+                    gl_coord = cursor_coord_window.convert();
+                }
+                else
+                {
+                    gl_coord = window_coord.convert();
+                }
+                MASK mask = gKeyboard->currentMask(TRUE);
+                // generate move event to update mouse coordinates
+                window_imp->mCallbacks->handleMouseMove(window_imp, gl_coord, mask);
+                if (window_imp->mCallbacks->handleRightMouseUp(window_imp, gl_coord, mask))
+                {
+                    return 0;
+                }
+            }
+        }
+        break;
 
-				// Because we move the cursor position in the llviewerapp, we need to query
-				// to find out where the cursor at the time the event is handled.
-				// If we don't do this, many clicks could get buffered up, and if the
-				// first click changes the cursor position, all subsequent clicks
-				// will occur at the wrong location.  JC
-				if (window_imp->mMousePositionModified)
-				{
-					LLCoordWindow cursor_coord_window;
-					window_imp->getCursorPosition(&cursor_coord_window);
-					gl_coord = cursor_coord_window.convert();
-				}
-				else
-				{
-					gl_coord = window_coord.convert();
-				}
-				MASK mask = gKeyboard->currentMask(TRUE);
-				// generate move event to update mouse coordinates
-				window_imp->mCallbacks->handleMouseMove(window_imp, gl_coord, mask);
-				if (window_imp->mCallbacks->handleRightMouseDown(window_imp, gl_coord, mask))
-				{
-					return 0;
-				}
-			}
-			break;
+        case WM_MBUTTONDOWN:
+            //		case WM_MBUTTONDBLCLK:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_MBUTTONDOWN");
+            {
+                LL_RECORD_BLOCK_TIME(FTM_MOUSEHANDLER);
+                if (LLWinImm::isAvailable() && window_imp->mPreeditor)
+                {
+                    window_imp->interruptLanguageTextInput();
+                }
+
+                // Because we move the cursor position in tllviewerhe app, we need to query
+                // to find out where the cursor at the time the event is handled.
+                // If we don't do this, many clicks could get buffered up, and if the
+                // first click changes the cursor position, all subsequent clicks
+                // will occur at the wrong location.  JC
+                if (window_imp->mMousePositionModified)
+                {
+                    LLCoordWindow cursor_coord_window;
+                    window_imp->getCursorPosition(&cursor_coord_window);
+                    gl_coord = cursor_coord_window.convert();
+                }
+                else
+                {
+                    gl_coord = window_coord.convert();
+                }
+                MASK mask = gKeyboard->currentMask(TRUE);
+                // generate move event to update mouse coordinates
+                window_imp->mCallbacks->handleMouseMove(window_imp, gl_coord, mask);
+                if (window_imp->mCallbacks->handleMiddleMouseDown(window_imp, gl_coord, mask))
+                {
+                    return 0;
+                }
+            }
+        }
+        break;
 
-		case WM_RBUTTONUP:
-			{
-				window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_RBUTTONUP");
-				LL_RECORD_BLOCK_TIME(FTM_MOUSEHANDLER);
-				// Because we move the cursor position in the app, we need to query
-				// to find out where the cursor at the time the event is handled.
-				// If we don't do this, many clicks could get buffered up, and if the
-				// first click changes the cursor position, all subsequent clicks
-				// will occur at the wrong location.  JC
-				if (window_imp->mMousePositionModified)
-				{
-					LLCoordWindow cursor_coord_window;
-					window_imp->getCursorPosition(&cursor_coord_window);
-					gl_coord = cursor_coord_window.convert();
-				}
-				else
-				{
-					gl_coord = window_coord.convert();
-				}
-				MASK mask = gKeyboard->currentMask(TRUE);
-				// generate move event to update mouse coordinates
-				window_imp->mCallbacks->handleMouseMove(window_imp, gl_coord, mask);
-				if (window_imp->mCallbacks->handleRightMouseUp(window_imp, gl_coord, mask))
-				{
-					return 0;
-				}
-			}
-			break;
+        case WM_MBUTTONUP:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_MBUTTONUP");
+            {
+                LL_RECORD_BLOCK_TIME(FTM_MOUSEHANDLER);
+                // Because we move the cursor position in the llviewer app, we need to query
+                // to find out where the cursor at the time the event is handled.
+                // If we don't do this, many clicks could get buffered up, and if the
+                // first click changes the cursor position, all subsequent clicks
+                // will occur at the wrong location.  JC
+                if (window_imp->mMousePositionModified)
+                {
+                    LLCoordWindow cursor_coord_window;
+                    window_imp->getCursorPosition(&cursor_coord_window);
+                    gl_coord = cursor_coord_window.convert();
+                }
+                else
+                {
+                    gl_coord = window_coord.convert();
+                }
+                MASK mask = gKeyboard->currentMask(TRUE);
+                // generate move event to update mouse coordinates
+                window_imp->mCallbacks->handleMouseMove(window_imp, gl_coord, mask);
+                if (window_imp->mCallbacks->handleMiddleMouseUp(window_imp, gl_coord, mask))
+                {
+                    return 0;
+                }
+            }
+        }
+        break;
+        case WM_XBUTTONDOWN:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_XBUTTONDOWN");
+            {
+                LL_RECORD_BLOCK_TIME(FTM_MOUSEHANDLER);
+                S32 button = GET_XBUTTON_WPARAM(w_param);
+                if (LLWinImm::isAvailable() && window_imp->mPreeditor)
+                {
+                    window_imp->interruptLanguageTextInput();
+                }
+
+                // Because we move the cursor position in tllviewerhe app, we need to query
+                // to find out where the cursor at the time the event is handled.
+                // If we don't do this, many clicks could get buffered up, and if the
+                // first click changes the cursor position, all subsequent clicks
+                // will occur at the wrong location.  JC
+                if (window_imp->mMousePositionModified)
+                {
+                    LLCoordWindow cursor_coord_window;
+                    window_imp->getCursorPosition(&cursor_coord_window);
+                    gl_coord = cursor_coord_window.convert();
+                }
+                else
+                {
+                    gl_coord = window_coord.convert();
+                }
+                MASK mask = gKeyboard->currentMask(TRUE);
+                // generate move event to update mouse coordinates
+                window_imp->mCallbacks->handleMouseMove(window_imp, gl_coord, mask);
+                // Windows uses numbers 1 and 2 for buttons, remap to 4, 5
+                if (window_imp->mCallbacks->handleOtherMouseDown(window_imp, gl_coord, mask, button + 3))
+                {
+                    return 0;
+                }
+            }
+        }
+        break;
 
-		case WM_MBUTTONDOWN:
-//		case WM_MBUTTONDBLCLK:
-			{
-				window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_MBUTTONDOWN");
-				LL_RECORD_BLOCK_TIME(FTM_MOUSEHANDLER);
-				if (LLWinImm::isAvailable() && window_imp->mPreeditor)
-				{
-					window_imp->interruptLanguageTextInput();
-				}
+        case WM_XBUTTONUP:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_XBUTTONUP");
+            {
+                LL_RECORD_BLOCK_TIME(FTM_MOUSEHANDLER);
+                S32 button = GET_XBUTTON_WPARAM(w_param);
+                // Because we move the cursor position in the llviewer app, we need to query
+                // to find out where the cursor at the time the event is handled.
+                // If we don't do this, many clicks could get buffered up, and if the
+                // first click changes the cursor position, all subsequent clicks
+                // will occur at the wrong location.  JC
+                if (window_imp->mMousePositionModified)
+                {
+                    LLCoordWindow cursor_coord_window;
+                    window_imp->getCursorPosition(&cursor_coord_window);
+                    gl_coord = cursor_coord_window.convert();
+                }
+                else
+                {
+                    gl_coord = window_coord.convert();
+                }
+                MASK mask = gKeyboard->currentMask(TRUE);
+                // generate move event to update mouse coordinates
+                window_imp->mCallbacks->handleMouseMove(window_imp, gl_coord, mask);
+                // Windows uses numbers 1 and 2 for buttons, remap to 4, 5
+                if (window_imp->mCallbacks->handleOtherMouseUp(window_imp, gl_coord, mask, button + 3))
+                {
+                    return 0;
+                }
+            }
+        }
+        break;
 
-				// Because we move the cursor position in tllviewerhe app, we need to query
-				// to find out where the cursor at the time the event is handled.
-				// If we don't do this, many clicks could get buffered up, and if the
-				// first click changes the cursor position, all subsequent clicks
-				// will occur at the wrong location.  JC
-				if (window_imp->mMousePositionModified)
-				{
-					LLCoordWindow cursor_coord_window;
-					window_imp->getCursorPosition(&cursor_coord_window);
-					gl_coord = cursor_coord_window.convert();
-				}
-				else
-				{
-					gl_coord = window_coord.convert();
-				}
-				MASK mask = gKeyboard->currentMask(TRUE);
-				// generate move event to update mouse coordinates
-				window_imp->mCallbacks->handleMouseMove(window_imp, gl_coord, mask);
-				if (window_imp->mCallbacks->handleMiddleMouseDown(window_imp, gl_coord, mask))
-				{
-					return 0;
-				}
-			}
-			break;
+        case WM_MOUSEWHEEL:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_MOUSEWHEEL");
+            static short z_delta = 0;
 
-		case WM_MBUTTONUP:
-			{
-				window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_MBUTTONUP");
-				LL_RECORD_BLOCK_TIME(FTM_MOUSEHANDLER);
-				// Because we move the cursor position in the llviewer app, we need to query
-				// to find out where the cursor at the time the event is handled.
-				// If we don't do this, many clicks could get buffered up, and if the
-				// first click changes the cursor position, all subsequent clicks
-				// will occur at the wrong location.  JC
-				if (window_imp->mMousePositionModified)
-				{
-					LLCoordWindow cursor_coord_window;
-					window_imp->getCursorPosition(&cursor_coord_window);
-					gl_coord = cursor_coord_window.convert();
-				}
-				else
-				{
-					gl_coord = window_coord.convert();
-				}
-				MASK mask = gKeyboard->currentMask(TRUE);
-				// generate move event to update mouse coordinates
-				window_imp->mCallbacks->handleMouseMove(window_imp, gl_coord, mask);
-				if (window_imp->mCallbacks->handleMiddleMouseUp(window_imp, gl_coord, mask))
-				{
-					return 0;
-				}
-			}
-			break;
-		case WM_XBUTTONDOWN:
-			{
-				window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_MBUTTONDOWN");
-				LL_RECORD_BLOCK_TIME(FTM_MOUSEHANDLER);
-				S32 button = GET_XBUTTON_WPARAM(w_param);
-				if (LLWinImm::isAvailable() && window_imp->mPreeditor)
-				{
-					window_imp->interruptLanguageTextInput();
-				}
+            RECT	client_rect;
 
-				// Because we move the cursor position in tllviewerhe app, we need to query
-				// to find out where the cursor at the time the event is handled.
-				// If we don't do this, many clicks could get buffered up, and if the
-				// first click changes the cursor position, all subsequent clicks
-				// will occur at the wrong location.  JC
-				if (window_imp->mMousePositionModified)
-				{
-					LLCoordWindow cursor_coord_window;
-					window_imp->getCursorPosition(&cursor_coord_window);
-					gl_coord = cursor_coord_window.convert();
-				}
-				else
-				{
-					gl_coord = window_coord.convert();
-				}
-				MASK mask = gKeyboard->currentMask(TRUE);
-				// generate move event to update mouse coordinates
-				window_imp->mCallbacks->handleMouseMove(window_imp, gl_coord, mask);
-				// Windows uses numbers 1 and 2 for buttons, remap to 4, 5
-				if (window_imp->mCallbacks->handleOtherMouseDown(window_imp, gl_coord, mask, button + 3))
-				{
-					return 0;
-				}
-			}
-			break;
+            // eat scroll events that occur outside our window, since we use mouse position to direct scroll
+            // instead of keyboard focus
+            // NOTE: mouse_coord is in *window* coordinates for scroll events
+            POINT mouse_coord = { (S32)(S16)LOWORD(l_param), (S32)(S16)HIWORD(l_param) };
 
-		case WM_XBUTTONUP:
-			{
-				window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_MBUTTONUP");
-				LL_RECORD_BLOCK_TIME(FTM_MOUSEHANDLER);
-				S32 button = GET_XBUTTON_WPARAM(w_param);
-				// Because we move the cursor position in the llviewer app, we need to query
-				// to find out where the cursor at the time the event is handled.
-				// If we don't do this, many clicks could get buffered up, and if the
-				// first click changes the cursor position, all subsequent clicks
-				// will occur at the wrong location.  JC
-				if (window_imp->mMousePositionModified)
-				{
-					LLCoordWindow cursor_coord_window;
-					window_imp->getCursorPosition(&cursor_coord_window);
-					gl_coord = cursor_coord_window.convert();
-				}
-				else
-				{
-					gl_coord = window_coord.convert();
-				}
-				MASK mask = gKeyboard->currentMask(TRUE);
-				// generate move event to update mouse coordinates
-				window_imp->mCallbacks->handleMouseMove(window_imp, gl_coord, mask);
-				// Windows uses numbers 1 and 2 for buttons, remap to 4, 5
-				if (window_imp->mCallbacks->handleOtherMouseUp(window_imp, gl_coord, mask, button + 3))
-				{
-					return 0;
-				}
-			}
-			break;
+            if (ScreenToClient(window_imp->mWindowHandle, &mouse_coord)
+                && GetClientRect(window_imp->mWindowHandle, &client_rect))
+            {
+                // we have a valid mouse point and client rect
+                if (mouse_coord.x < client_rect.left || client_rect.right < mouse_coord.x
+                    || mouse_coord.y < client_rect.top || client_rect.bottom < mouse_coord.y)
+                {
+                    // mouse is outside of client rect, so don't do anything
+                    return 0;
+                }
+            }
 
-		case WM_MOUSEWHEEL:
-			{
-				window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_MOUSEWHEEL");
-				static short z_delta = 0;
+            S16 incoming_z_delta = HIWORD(w_param);
+            z_delta += incoming_z_delta;
+            // cout << "z_delta " << z_delta << endl;
+
+            // current mouse wheels report changes in increments of zDelta (+120, -120)
+            // Future, higher resolution mouse wheels may report smaller deltas.
+            // So we sum the deltas and only act when we've exceeded WHEEL_DELTA
+            //
+            // If the user rapidly spins the wheel, we can get messages with
+            // large deltas, like 480 or so.  Thus we need to scroll more quickly.
+            if (z_delta <= -WHEEL_DELTA || WHEEL_DELTA <= z_delta)
+            {
+                window_imp->mCallbacks->handleScrollWheel(window_imp, -z_delta / WHEEL_DELTA);
+                z_delta = 0;
+            }
+            return 0;
+        }
+        /*
+        // TODO: add this after resolving _WIN32_WINNT issue
+        case WM_MOUSELEAVE:
+        {
+        window_imp->mCallbacks->handleMouseLeave(window_imp);
+
+        //				TRACKMOUSEEVENT track_mouse_event;
+        //				track_mouse_event.cbSize = sizeof( TRACKMOUSEEVENT );
+        //				track_mouse_event.dwFlags = TME_LEAVE;
+        //				track_mouse_event.hwndTrack = h_wnd;
+        //				track_mouse_event.dwHoverTime = HOVER_DEFAULT;
+        //				TrackMouseEvent( &track_mouse_event );
+        return 0;
+        }
+        */
+        case WM_MOUSEHWHEEL:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_MOUSEHWHEEL");
+            static short h_delta = 0;
 
-				RECT	client_rect;
+            RECT	client_rect;
 
-				// eat scroll events that occur outside our window, since we use mouse position to direct scroll
-				// instead of keyboard focus
-				// NOTE: mouse_coord is in *window* coordinates for scroll events
-				POINT mouse_coord = {(S32)(S16)LOWORD(l_param), (S32)(S16)HIWORD(l_param)};
+            // eat scroll events that occur outside our window, since we use mouse position to direct scroll
+            // instead of keyboard focus
+            // NOTE: mouse_coord is in *window* coordinates for scroll events
+            POINT mouse_coord = { (S32)(S16)LOWORD(l_param), (S32)(S16)HIWORD(l_param) };
 
-				if (ScreenToClient(window_imp->mWindowHandle, &mouse_coord)
-					&& GetClientRect(window_imp->mWindowHandle, &client_rect))
-				{
-					// we have a valid mouse point and client rect
-					if (mouse_coord.x < client_rect.left || client_rect.right < mouse_coord.x
-						|| mouse_coord.y < client_rect.top || client_rect.bottom < mouse_coord.y)
-					{
-						// mouse is outside of client rect, so don't do anything
-						return 0;
-					}
-				}
+            if (ScreenToClient(window_imp->mWindowHandle, &mouse_coord)
+                && GetClientRect(window_imp->mWindowHandle, &client_rect))
+            {
+                // we have a valid mouse point and client rect
+                if (mouse_coord.x < client_rect.left || client_rect.right < mouse_coord.x
+                    || mouse_coord.y < client_rect.top || client_rect.bottom < mouse_coord.y)
+                {
+                    // mouse is outside of client rect, so don't do anything
+                    return 0;
+                }
+            }
 
-				S16 incoming_z_delta = HIWORD(w_param);
-				z_delta += incoming_z_delta;
-				// cout << "z_delta " << z_delta << endl;
-
-				// current mouse wheels report changes in increments of zDelta (+120, -120)
-				// Future, higher resolution mouse wheels may report smaller deltas.
-				// So we sum the deltas and only act when we've exceeded WHEEL_DELTA
-				//
-				// If the user rapidly spins the wheel, we can get messages with
-				// large deltas, like 480 or so.  Thus we need to scroll more quickly.
-				if (z_delta <= -WHEEL_DELTA || WHEEL_DELTA <= z_delta)
-				{
-					window_imp->mCallbacks->handleScrollWheel(window_imp, -z_delta / WHEEL_DELTA);
-					z_delta = 0;
-				}
-				return 0;
-			}
-			/*
-			// TODO: add this after resolving _WIN32_WINNT issue
-			case WM_MOUSELEAVE:
-			{
-			window_imp->mCallbacks->handleMouseLeave(window_imp);
-
-			//				TRACKMOUSEEVENT track_mouse_event;
-			//				track_mouse_event.cbSize = sizeof( TRACKMOUSEEVENT );
-			//				track_mouse_event.dwFlags = TME_LEAVE;
-			//				track_mouse_event.hwndTrack = h_wnd;
-			//				track_mouse_event.dwHoverTime = HOVER_DEFAULT;
-			//				TrackMouseEvent( &track_mouse_event ); 
-			return 0;
-			}
-			*/
-		case WM_MOUSEHWHEEL:
-			{
-				window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_MOUSEHWHEEL");
-				static short h_delta = 0;
+            S16 incoming_h_delta = HIWORD(w_param);
+            h_delta += incoming_h_delta;
 
-				RECT	client_rect;
+            // If the user rapidly spins the wheel, we can get messages with
+            // large deltas, like 480 or so.  Thus we need to scroll more quickly.
+            if (h_delta <= -WHEEL_DELTA || WHEEL_DELTA <= h_delta)
+            {
+                WINDOW_IMP_POST(window_imp->mCallbacks->handleScrollHWheel(window_imp, h_delta / WHEEL_DELTA));
+                h_delta = 0;
+            }
+            return 0;
+        }
+        // Handle mouse movement within the window
+        case WM_MOUSEMOVE:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_MOUSEMOVE");
+            if (!window_imp->mMousePositionModified)
+            {
+                MASK mask = gKeyboard->currentMask(TRUE);
+                WINDOW_IMP_POST(window_imp->mCallbacks->handleMouseMove(window_imp, window_coord.convert(), mask));
+            }
+            return 0;
+        }
 
-				// eat scroll events that occur outside our window, since we use mouse position to direct scroll
-				// instead of keyboard focus
-				// NOTE: mouse_coord is in *window* coordinates for scroll events
-				POINT mouse_coord = {(S32)(S16)LOWORD(l_param), (S32)(S16)HIWORD(l_param)};
+        case WM_GETMINMAXINFO:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_GETMINMAXINFO");
+            LPMINMAXINFO min_max = (LPMINMAXINFO)l_param;
+            min_max->ptMinTrackSize.x = window_imp->mMinWindowWidth;
+            min_max->ptMinTrackSize.y = window_imp->mMinWindowHeight;
+            return 0;
+        }
 
-				if (ScreenToClient(window_imp->mWindowHandle, &mouse_coord)
-					&& GetClientRect(window_imp->mWindowHandle, &client_rect))
-				{
-					// we have a valid mouse point and client rect
-					if (mouse_coord.x < client_rect.left || client_rect.right < mouse_coord.x
-						|| mouse_coord.y < client_rect.top || client_rect.bottom < mouse_coord.y)
-					{
-						// mouse is outside of client rect, so don't do anything
-						return 0;
-					}
-				}
+        case WM_SIZE:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_SIZE");
+            S32 width = S32(LOWORD(l_param));
+            S32 height = S32(HIWORD(l_param));
 
-				S16 incoming_h_delta = HIWORD(w_param);
-				h_delta += incoming_h_delta;
+            if (debug_window_proc)
+            {
+                BOOL maximized = (w_param == SIZE_MAXIMIZED);
+                BOOL restored = (w_param == SIZE_RESTORED);
+                BOOL minimized = (w_param == SIZE_MINIMIZED);
+
+                LL_INFOS("Window") << "WINDOWPROC Size "
+                    << width << "x" << height
+                    << " max " << S32(maximized)
+                    << " min " << S32(minimized)
+                    << " rest " << S32(restored)
+                    << LL_ENDL;
+            }
 
-				// If the user rapidly spins the wheel, we can get messages with
-				// large deltas, like 480 or so.  Thus we need to scroll more quickly.
-				if (h_delta <= -WHEEL_DELTA || WHEEL_DELTA <= h_delta)
-				{
-					window_imp->mCallbacks->handleScrollHWheel(window_imp, h_delta / WHEEL_DELTA);
-					h_delta = 0;
-				}
-				return 0;
-			}
-			// Handle mouse movement within the window
-		case WM_MOUSEMOVE:
-			{
-				window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_MOUSEMOVE");
-				MASK mask = gKeyboard->currentMask(TRUE);
-				window_imp->mCallbacks->handleMouseMove(window_imp, window_coord.convert(), mask);
-				return 0;
-			}
+            // There's an odd behavior with WM_SIZE that I would call a bug. If 
+            // the window is maximized, and you call MoveWindow() with a size smaller
+            // than a maximized window, it ends up sending WM_SIZE with w_param set 
+            // to SIZE_MAXIMIZED -- which isn't true. So the logic below doesn't work.
+            // (SL-44655). Fixed it by calling ShowWindow(SW_RESTORE) first (see 
+            // LLWindowWin32::moveWindow in this file). 
 
-		case WM_GETMINMAXINFO:
-			{
-				LPMINMAXINFO min_max = (LPMINMAXINFO)l_param;
-				min_max->ptMinTrackSize.x = window_imp->mMinWindowWidth;
-				min_max->ptMinTrackSize.y = window_imp->mMinWindowHeight;
-				return 0;
-			}
+            // If we are now restored, but we weren't before, this
+            // means that the window was un-minimized.
+            if (w_param == SIZE_RESTORED && window_imp->mLastSizeWParam != SIZE_RESTORED)
+            {
+                WINDOW_IMP_POST(window_imp->mCallbacks->handleActivate(window_imp, TRUE));
+            }
 
-		case WM_SIZE:
-			{
-				window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_SIZE");
-				S32 width = S32( LOWORD(l_param) );
-				S32 height = S32( HIWORD(l_param) );
+            // handle case of window being maximized from fully minimized state
+            if (w_param == SIZE_MAXIMIZED && window_imp->mLastSizeWParam != SIZE_MAXIMIZED)
+            {
+                WINDOW_IMP_POST(window_imp->mCallbacks->handleActivate(window_imp, TRUE));
+            }
 
-				if (debug_window_proc)
-				{
-					BOOL maximized = ( w_param == SIZE_MAXIMIZED );
-					BOOL restored  = ( w_param == SIZE_RESTORED );
-					BOOL minimized = ( w_param == SIZE_MINIMIZED );
-
-					LL_INFOS("Window") << "WINDOWPROC Size "
-						<< width << "x" << height
-						<< " max " << S32(maximized)
-						<< " min " << S32(minimized)
-						<< " rest " << S32(restored)
-						<< LL_ENDL;
-				}
+            // Also handle the minimization case
+            if (w_param == SIZE_MINIMIZED && window_imp->mLastSizeWParam != SIZE_MINIMIZED)
+            {
+                WINDOW_IMP_POST(window_imp->mCallbacks->handleActivate(window_imp, FALSE));
+            }
 
-				// There's an odd behavior with WM_SIZE that I would call a bug. If 
-				// the window is maximized, and you call MoveWindow() with a size smaller
-				// than a maximized window, it ends up sending WM_SIZE with w_param set 
-				// to SIZE_MAXIMIZED -- which isn't true. So the logic below doesn't work.
-				// (SL-44655). Fixed it by calling ShowWindow(SW_RESTORE) first (see 
-				// LLWindowWin32::moveWindow in this file). 
+            // Actually resize all of our views
+            if (w_param != SIZE_MINIMIZED)
+            {
+                // Ignore updates for minimizing and minimized "windows"
+                WINDOW_IMP_POST(window_imp->mCallbacks->handleResize(window_imp,
+                    LOWORD(l_param),
+                    HIWORD(l_param)));
+            }
 
-				// If we are now restored, but we weren't before, this
-				// means that the window was un-minimized.
-				if (w_param == SIZE_RESTORED && window_imp->mLastSizeWParam != SIZE_RESTORED)
-				{
-					window_imp->mCallbacks->handleActivate(window_imp, TRUE);
-				}
+            window_imp->mLastSizeWParam = w_param;
 
-				// handle case of window being maximized from fully minimized state
-				if (w_param == SIZE_MAXIMIZED && window_imp->mLastSizeWParam != SIZE_MAXIMIZED)
-				{
-					window_imp->mCallbacks->handleActivate(window_imp, TRUE);
-				}
+            return 0;
+        }
 
-				// Also handle the minimization case
-				if (w_param == SIZE_MINIMIZED && window_imp->mLastSizeWParam != SIZE_MINIMIZED)
-				{
-					window_imp->mCallbacks->handleActivate(window_imp, FALSE);
-				}
+        case WM_DPICHANGED:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_DPICHANGED");
+            LPRECT lprc_new_scale;
+            F32 new_scale = F32(LOWORD(w_param)) / F32(USER_DEFAULT_SCREEN_DPI);
+            lprc_new_scale = (LPRECT)l_param;
+            S32 new_width = lprc_new_scale->right - lprc_new_scale->left;
+            S32 new_height = lprc_new_scale->bottom - lprc_new_scale->top;
+            WINDOW_IMP_POST(window_imp->mCallbacks->handleDPIChanged(window_imp, new_scale, new_width, new_height));
+            
+            SetWindowPos(h_wnd,
+                HWND_TOP,
+                lprc_new_scale->left,
+                lprc_new_scale->top,
+                new_width,
+                new_height,
+                SWP_NOZORDER | SWP_NOACTIVATE);
+           
+            return 0;
+        }
 
-				// Actually resize all of our views
-				if (w_param != SIZE_MINIMIZED)
-				{
-					// Ignore updates for minimizing and minimized "windows"
-					window_imp->mCallbacks->handleResize(	window_imp, 
-						LOWORD(l_param), 
-						HIWORD(l_param) );
-				}
+        case WM_SETFOCUS:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_SETFOCUS");
+            if (debug_window_proc)
+            {
+                LL_INFOS("Window") << "WINDOWPROC SetFocus" << LL_ENDL;
+            }
+            WINDOW_IMP_POST(window_imp->mCallbacks->handleFocus(window_imp));
+            return 0;
+        }
 
-				window_imp->mLastSizeWParam = w_param;
+        case WM_KILLFOCUS:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_KILLFOCUS");
+            if (debug_window_proc)
+            {
+                LL_INFOS("Window") << "WINDOWPROC KillFocus" << LL_ENDL;
+            }
+            WINDOW_IMP_POST(window_imp->mCallbacks->handleFocusLost(window_imp));
+            return 0;
+        }
 
-				return 0;
-			}
+        case WM_COPYDATA:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_COPYDATA");
+            {
+                // received a URL
+                PCOPYDATASTRUCT myCDS = (PCOPYDATASTRUCT)l_param;
+                void* data = new U8[myCDS->cbData];
+                memcpy(data, myCDS->lpData, myCDS->cbData);
+                auto myType = myCDS->dwData;
+
+                window_imp->post([=]()
+                    {
+                       window_imp->mCallbacks->handleDataCopy(window_imp, myType, data);
+                       delete[] data;
+                    });
+            };
+            return 0;
+
+            break;
+        }
+        case WM_SETTINGCHANGE:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - WM_SETTINGCHANGE");
+            if (w_param == SPI_SETMOUSEVANISH)
+            {
+                if (!SystemParametersInfo(SPI_GETMOUSEVANISH, 0, &window_imp->mMouseVanish, 0))
+                {
+                    WINDOW_IMP_POST(window_imp->mMouseVanish = TRUE);
+                }
+            }
+        }
+        break;
         
-		case WM_DPICHANGED:
-			{
-				LPRECT lprc_new_scale;
-				F32 new_scale = F32(LOWORD(w_param)) / F32(USER_DEFAULT_SCREEN_DPI);
-				lprc_new_scale = (LPRECT)l_param;
-				S32 new_width = lprc_new_scale->right - lprc_new_scale->left;
-				S32 new_height = lprc_new_scale->bottom - lprc_new_scale->top;
-				if (window_imp->mCallbacks->handleDPIChanged(window_imp, new_scale, new_width, new_height))
-				{
-					SetWindowPos(h_wnd,
-						HWND_TOP,
-						lprc_new_scale->left,
-						lprc_new_scale->top,
-						new_width,
-						new_height,
-						SWP_NOZORDER | SWP_NOACTIVATE);
-				}
-				return 0;
-			}
-
-		case WM_SETFOCUS:
-			if (debug_window_proc)
-			{
-				LL_INFOS("Window") << "WINDOWPROC SetFocus" << LL_ENDL;
-			}
-			window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_SETFOCUS");
-			window_imp->mCallbacks->handleFocus(window_imp);
-			return 0;
-
-		case WM_KILLFOCUS:
-			if (debug_window_proc)
-			{
-				LL_INFOS("Window") << "WINDOWPROC KillFocus" << LL_ENDL;
-			}
-			window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_KILLFOCUS");
-			window_imp->mCallbacks->handleFocusLost(window_imp);
-			return 0;
-
-		case WM_COPYDATA:
-			{
-				window_imp->mCallbacks->handlePingWatchdog(window_imp, "Main:WM_COPYDATA");
-				// received a URL
-				PCOPYDATASTRUCT myCDS = (PCOPYDATASTRUCT) l_param;
-				window_imp->mCallbacks->handleDataCopy(window_imp, myCDS->dwData, myCDS->lpData);
-			};
-			return 0;			
-
-			break;
-
-		case WM_SETTINGCHANGE:
-			{
-				if (w_param == SPI_SETMOUSEVANISH)
-				{
-					if (!SystemParametersInfo(SPI_GETMOUSEVANISH, 0, &window_imp->mMouseVanish, 0))
-					{
-						window_imp->mMouseVanish = TRUE;
-					}
-				}
-			}
-			break;
-		default:
-			{
-				if (debug_window_proc)
-				{
-					LL_INFOS("Window") << "Unhandled windows message code: " << U32(u_msg) << LL_ENDL;
-				}
-			}
-			break;
-		}
-
-	window_imp->mCallbacks->handlePauseWatchdog(window_imp);	
-	}
+        //list of messages we get often that we don't care to log about
+        case WM_NCHITTEST:
+        case WM_NCMOUSEMOVE:
+        case WM_NCMOUSELEAVE:
+        case WM_MOVING:
+        case WM_MOVE:
+        case WM_WINDOWPOSCHANGING:
+        case WM_WINDOWPOSCHANGED:
+        break;
+
+        default:
+        {
+            LL_PROFILE_ZONE_NAMED("mwp - default");
+            if (debug_window_proc)
+            {
+                LL_INFOS("Window") << "Unhandled windows message code: 0x" << std::hex << U32(u_msg) << LL_ENDL;
+            }
+        }
+        break;
+        }
+    }
     else
     {
         // (NULL == window_imp)
         LL_DEBUGS("Window") << "No window implementation to handle message with, message code: " << U32(u_msg) << LL_ENDL;
     }
 
-	// pass unhandled messages down to Windows
-	return DefWindowProc(h_wnd, u_msg, w_param, l_param);
+    // pass unhandled messages down to Windows
+    LRESULT ret;
+    {
+        LL_PROFILE_ZONE_NAMED("mwp - DefWindowProc");
+        ret = DefWindowProc(h_wnd, u_msg, w_param, l_param);
+    }
+    return ret;
 }
 
 BOOL LLWindowWin32::convertCoords(LLCoordGL from, LLCoordWindow *to)
@@ -3184,6 +3425,8 @@ BOOL LLWindowWin32::copyTextToClipboard(const LLWString& wstr)
 // Constrains the mouse to the window.
 void LLWindowWin32::setMouseClipping( BOOL b )
 {
+    LL_PROFILE_ZONE_SCOPED;
+    ASSERT_MAIN_THREAD();
 	if( b != mIsMouseClipping )
 	{
 		BOOL success = FALSE;
@@ -3260,6 +3503,7 @@ F32 LLWindowWin32::getGamma()
 
 BOOL LLWindowWin32::restoreGamma()
 {
+    ASSERT_MAIN_THREAD();
 	if (mCustomGammaSet != FALSE)
 	{
         LL_DEBUGS("Window") << "Restoring gamma" << LL_ENDL;
@@ -3271,6 +3515,7 @@ BOOL LLWindowWin32::restoreGamma()
 
 BOOL LLWindowWin32::setGamma(const F32 gamma)
 {
+    ASSERT_MAIN_THREAD();
 	mCurrentGamma = gamma;
 
 	//Get the previous gamma ramp to restore later.
@@ -3309,6 +3554,7 @@ BOOL LLWindowWin32::setGamma(const F32 gamma)
 
 void LLWindowWin32::setFSAASamples(const U32 fsaa_samples)
 {
+    ASSERT_MAIN_THREAD();
 	mFSAASamples = fsaa_samples;
 }
 
@@ -3319,6 +3565,7 @@ U32 LLWindowWin32::getFSAASamples()
 
 LLWindow::LLWindowResolution* LLWindowWin32::getSupportedResolutions(S32 &num_resolutions)
 {
+    ASSERT_MAIN_THREAD();
 	if (!mSupportedResolutions)
 	{
 		mSupportedResolutions = new LLWindowResolution[MAX_NUM_RESOLUTIONS];
@@ -3473,7 +3720,10 @@ BOOL LLWindowWin32::resetDisplayResolution()
 
 void LLWindowWin32::swapBuffers()
 {
+    ASSERT_MAIN_THREAD();
 	SwapBuffers(mhDC);
+
+    LL_PROFILER_GPU_COLLECT
 }
 
 
@@ -3946,6 +4196,7 @@ void LLWindowWin32::updateLanguageTextInputArea()
 
 void LLWindowWin32::interruptLanguageTextInput()
 {
+    ASSERT_MAIN_THREAD();
 	if (mPreeditor && LLWinImm::isAvailable())
 	{
 		HIMC himc = LLWinImm::getContext(mWindowHandle);
@@ -4148,6 +4399,7 @@ static LLWString find_context(const LLWString & wtext, S32 focus, S32 focus_leng
 // for files and via IDropTarget interface requests.
 LLWindowCallbacks::DragNDropResult LLWindowWin32::completeDragNDropRequest( const LLCoordGL gl_coord, const MASK mask, LLWindowCallbacks::DragNDropAction action, const std::string url )
 {
+    ASSERT_MAIN_THREAD();
 	return mCallbacks->handleDragNDrop( this, gl_coord, mask, action, url );
 }
 
@@ -4196,6 +4448,7 @@ BOOL LLWindowWin32::handleImeRequests(WPARAM request, LPARAM param, LRESULT *res
 					LL_WARNS("Window") << "*** IMR_QUERYCHARPOSITON called but getPreeditLocation failed." << LL_ENDL;
 					return FALSE;
 				}
+
 				fillCharPosition(caret_coord, preedit_bounds, text_control, char_position);
 
 				*result = 1;
@@ -4403,3 +4656,79 @@ std::vector<std::string> LLWindowWin32::getDynamicFallbackFontList()
 
 
 #endif // LL_WINDOWS
+
+inline LLWindowWin32Thread::LLWindowWin32Thread(LLWindowWin32* window)
+    : LLThread("Window Thread"), 
+    mWindow(window),
+    mFunctionQueue(MAX_QUEUE_SIZE)
+{
+
+}
+
+inline void LLWindowWin32Thread::run()
+{
+    sWindowThreadId = getID();
+    while (!mFinished)
+    {
+        LL_PROFILE_ZONE_SCOPED;
+
+
+        if (mWindow && mWindow->mWindowHandle != 0)
+        {
+            MSG msg;
+            BOOL status;
+            if (mWindow->mhDC == 0)
+            {
+                LL_PROFILE_ZONE_NAMED("w32t - PeekMessage");
+                status = PeekMessage(&msg, mWindow->mWindowHandle, 0, 0, PM_REMOVE);
+            }
+            else
+            {
+                LL_PROFILE_ZONE_NAMED("w32t - GetMessage");
+                status = GetMessage(&msg, mWindow->mWindowHandle, 0, 0);
+            }
+            if (status > 0)
+            {
+                TranslateMessage(&msg);
+                DispatchMessage(&msg);
+
+                mMessageQueue.pushFront(msg);
+            }
+        }
+
+        {
+            LL_PROFILE_ZONE_NAMED("w32t - Function Queue");
+            //process any pending functions
+            std::function<void()> curFunc;
+            while (mFunctionQueue.tryPopBack(curFunc))
+            {
+                curFunc();
+            }
+        }
+        
+#if 0
+        {
+            LL_PROFILE_ZONE_NAMED("w32t - Sleep");
+            std::this_thread::sleep_for(std::chrono::milliseconds(1));
+        }
+#endif
+    }
+}
+
+void LLWindowWin32Thread::post(const std::function<void()>& func)
+{
+#if LL_WINDOW_SINGLE_THREADED
+    func();
+#else
+    mFunctionQueue.pushFront(func);
+#endif
+}
+
+void LLWindowWin32::post(const std::function<void()>& func)
+{
+#if LL_WINDOW_SINGLE_THREADED
+    func();
+#else
+    mFunctionQueue.pushFront(func);
+#endif
+}
+\ No newline at end of file
diff --git a/indra/llwindow/llwindowwin32.h b/indra/llwindow/llwindowwin32.h
index 0b3d14fb16..66647459b2 100644
--- a/indra/llwindow/llwindowwin32.h
+++ b/indra/llwindow/llwindowwin32.h
@@ -33,11 +33,46 @@
 #include "llwindow.h"
 #include "llwindowcallbacks.h"
 #include "lldragdropwin32.h"
+#include "llthread.h"
+#include "llthreadsafequeue.h"
 
 // Hack for async host by name
 #define LL_WM_HOST_RESOLVED      (WM_APP + 1)
 typedef void (*LLW32MsgCallback)(const MSG &msg);
 
+class LLWindowWin32;
+
+// Thread that owns the Window Handle
+class LLWindowWin32Thread : public LLThread
+{
+public:
+    class Message
+    {
+    public:
+        LRESULT mMsg;
+    };
+
+    static const int MAX_QUEUE_SIZE = 2048;
+
+    LLThreadSafeQueue<MSG> mMessageQueue;
+    LLThreadSafeQueue<std::function<void()>> mFunctionQueue;
+
+    bool mFinished = false;
+
+    LLWindowWin32Thread(LLWindowWin32* window);
+
+    void run() override;
+
+    void post(const std::function<void()>& func);
+
+private:
+
+    // call PeekMessage and pull enqueue messages for later processing
+    void gatherInput();
+    LLWindowWin32* mWindow = nullptr;
+
+};
+
 class LLWindowWin32 : public LLWindow
 {
 public:
@@ -172,9 +207,9 @@ protected:
 	WCHAR		*mWindowTitle;
 	WCHAR		*mWindowClassName;
 
-	HWND		mWindowHandle;	// window handle
-	HGLRC		mhRC;			// OpenGL rendering context
-	HDC			mhDC;			// Windows Device context handle
+	HWND	    mWindowHandle = 0;	// window handle
+	HGLRC		mhRC = 0;			// OpenGL rendering context
+	HDC			mhDC = 0;			// Windows Device context handle
 	HINSTANCE	mhInstance;		// handle to application instance
 	WNDPROC		mWndProc;		// user-installable window proc
 	RECT		mOldMouseClip;  // Screen rect to which the mouse cursor was globally constrained before we changed it in clipMouse()
@@ -221,7 +256,12 @@ protected:
 
 	BOOL			mMouseVanish;
 
+    LLWindowWin32Thread* mWindowThread = nullptr;
+    LLThreadSafeQueue<std::function<void()>> mFunctionQueue;
+    void post(const std::function<void()>& func);
+
 	friend class LLWindowManager;
+    friend class LLWindowWin32Thread;
 };
 
 class LLSplashScreenWin32 : public LLSplashScreen
diff --git a/indra/llxml/llcontrol.h b/indra/llxml/llcontrol.h
index 19508becc3..5da13f5010 100644
--- a/indra/llxml/llcontrol.h
+++ b/indra/llxml/llcontrol.h
@@ -405,8 +405,8 @@ public:
 					const T& default_value, 
 					const std::string& comment = "Declared In Code")
 	{
-		mCachedControlPtr = LLControlCache<T>::getInstance(name);
-		if (mCachedControlPtr.isNull())
+		mCachedControlPtr = LLControlCache<T>::getInstance(name).get();
+		if (! mCachedControlPtr)
 		{
 			mCachedControlPtr = new LLControlCache<T>(group, name, default_value, comment);
 		}
@@ -415,8 +415,8 @@ public:
 	LLCachedControl(LLControlGroup& group,
 					const std::string& name)
 	{
-		mCachedControlPtr = LLControlCache<T>::getInstance(name);
-		if (mCachedControlPtr.isNull())
+		mCachedControlPtr = LLControlCache<T>::getInstance(name).get();
+		if (! mCachedControlPtr)
 		{
 			mCachedControlPtr = new LLControlCache<T>(group, name);
 		}
diff --git a/indra/newview/CMakeLists.txt b/indra/newview/CMakeLists.txt
index 68b5969ff1..0144cff4b2 100644
--- a/indra/newview/CMakeLists.txt
+++ b/indra/newview/CMakeLists.txt
@@ -47,6 +47,7 @@ include(OpenGL)
 include(OpenSSL)
 include(PNG)
 include(TemplateCheck)
+include(Tracy)
 include(UI)
 include(UnixInstall)
 include(ViewerMiscLibs)
@@ -92,6 +93,7 @@ include_directories(
     ${LIBS_PREBUILT_DIR}/include/collada/1.4
     ${LLAPPEARANCE_INCLUDE_DIRS}
     ${CMAKE_CURRENT_SOURCE_DIR}
+    ${TRACY_INCLUDE_DIR}
     )
 
 include_directories(SYSTEM
@@ -391,7 +393,6 @@ set(viewer_SOURCE_FILES
     llloginhandler.cpp
     lllogininstance.cpp
     llmachineid.cpp
-    llmainlooprepeater.cpp
     llmanip.cpp
     llmaniprotate.cpp
     llmanipscale.cpp
@@ -587,7 +588,6 @@ set(viewer_SOURCE_FILES
     llsyntaxid.cpp
     llsyswellitem.cpp
     llsyswellwindow.cpp
-    lltelemetry.cpp
     llteleporthistory.cpp
     llteleporthistorystorage.cpp
     lltextureatlas.cpp
@@ -1031,7 +1031,6 @@ set(viewer_HEADER_FILES
     llloginhandler.h
     lllogininstance.h
     llmachineid.h
-    llmainlooprepeater.h
     llmanip.h
     llmaniprotate.h
     llmanipscale.h
@@ -2065,6 +2064,7 @@ target_link_libraries(${VIEWER_BINARY_NAME}
     ${LLPHYSICS_LIBRARIES}
     ${LLPHYSICSEXTENSIONS_LIBRARIES}
     ${LLAPPEARANCE_LIBRARIES}
+    ${TRACY_LIBRARY}
     )
 
 if (USE_BUGSPLAT)
diff --git a/indra/newview/app_settings/settings.xml b/indra/newview/app_settings/settings.xml
index b1120c18b2..3c7fe174fd 100644
--- a/indra/newview/app_settings/settings.xml
+++ b/indra/newview/app_settings/settings.xml
@@ -812,17 +812,6 @@
       <key>Value</key>
       <integer>0</integer>
     </map>
-    <key>FramePerSecondLimit</key>
-    <map>
-      <key>Comment</key>
-      <string>Controls upper limit of frames per second</string>
-      <key>Persist</key>
-      <integer>1</integer>
-      <key>Type</key>
-      <string>U32</string>
-      <key>Value</key>
-      <integer>120</integer>
-    </map>
     <key>BackgroundYieldTime</key>
     <map>
       <key>Comment</key>
@@ -3366,13 +3355,13 @@
     <key>DisableVerticalSync</key>
     <map>
       <key>Comment</key>
-      <string>Update frames as fast as possible (FALSE = update frames between display scans)</string>
+      <string>Update frames as fast as possible (FALSE = update frames between display scans).  Requires restart.</string>
       <key>Persist</key>
       <integer>1</integer>
       <key>Type</key>
       <string>Boolean</string>
       <key>Value</key>
-      <integer>1</integer>
+      <integer>0</integer>
     </map>
     <key>EnableGroupChatPopups</key>
     <map>
@@ -3869,6 +3858,17 @@
         <key>Value</key>
         <integer>1</integer>
     </map>
+    <key>MainWorkTime</key>
+    <map>
+        <key>Comment</key>
+        <string>Max time per frame devoted to mainloop work queue (in milliseconds)</string>
+        <key>Persist</key>
+        <integer>1</integer>
+        <key>Type</key>
+        <string>F32</string>
+        <key>Value</key>
+        <real>0.1</real>
+    </map>
     <key>QueueInventoryFetchTimeout</key>
     <map>
         <key>Comment</key>
@@ -12663,6 +12663,20 @@
       <key>Value</key>
       <integer>50</integer>
     </map>
+    <key>ThreadPoolSizes</key>
+    <map>
+      <key>Comment</key>
+      <string>Map of size overrides for specific thread pools.</string>
+      <key>Persist</key>
+      <integer>1</integer>
+      <key>Type</key>
+      <string>LLSD</string>
+      <key>Value</key>
+      <map>
+        <key>General</key>
+        <integer>4</integer>
+      </map>
+    </map>
     <key>ThrottleBandwidthKBPS</key>
     <map>
       <key>Comment</key>
diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp
index 69606793db..a016e658ef 100644
--- a/indra/newview/llappviewer.cpp
+++ b/indra/newview/llappviewer.cpp
@@ -91,7 +91,6 @@
 #include "llsdutil_math.h"
 #include "lllocationhistory.h"
 #include "llfasttimerview.h"
-#include "lltelemetry.h"
 #include "llvector4a.h"
 #include "llviewermenufile.h"
 #include "llvoicechannel.h"
@@ -234,11 +233,12 @@
 #include "llavatariconctrl.h"
 #include "llgroupiconctrl.h"
 #include "llviewerassetstats.h"
+#include "workqueue.h"
+using namespace LL;
 
 // Include for security api initialization
 #include "llsecapi.h"
 #include "llmachineid.h"
-#include "llmainlooprepeater.h"
 #include "llcleanup.h"
 
 #include "llcoproceduremanager.h"
@@ -367,6 +367,8 @@ BOOL gLogoutInProgress = FALSE;
 
 BOOL gSimulateMemLeak = FALSE;
 
+WorkQueue gMainloopWork("mainloop");
+
 ////////////////////////////////////////////////////////////
 // Internal globals... that should be removed.
 static std::string gArgs;
@@ -382,42 +384,6 @@ static std::string gLaunchFileOnQuit;
 // Used on Win32 for other apps to identify our window (eg, win_setup)
 const char* const VIEWER_WINDOW_CLASSNAME = "Second Life";
 
-//-- LLDeferredTaskList ------------------------------------------------------
-
-/**
- * A list of deferred tasks.
- *
- * We sometimes need to defer execution of some code until the viewer gets idle,
- * e.g. removing an inventory item from within notifyObservers() may not work out.
- *
- * Tasks added to this list will be executed in the next LLAppViewer::idle() iteration.
- * All tasks are executed only once.
- */
-class LLDeferredTaskList: public LLSingleton<LLDeferredTaskList>
-{
-	LLSINGLETON_EMPTY_CTOR(LLDeferredTaskList);
-	LOG_CLASS(LLDeferredTaskList);
-
-	friend class LLAppViewer;
-	typedef boost::signals2::signal<void()> signal_t;
-
-	void addTask(const signal_t::slot_type& cb)
-	{
-		mSignal.connect(cb);
-	}
-
-	void run()
-	{
-		if (!mSignal.empty())
-		{
-			mSignal();
-			mSignal.disconnect_all_slots();
-		}
-	}
-
-	signal_t mSignal;
-};
-
 //----------------------------------------------------------------------------
 
 // List of entries from strings.xml to always replace
@@ -695,8 +661,7 @@ LLAppViewer::LLAppViewer()
 	mPeriodicSlowFrame(LLCachedControl<bool>(gSavedSettings,"Periodic Slow Frame", FALSE)),
 	mFastTimerLogThread(NULL),
 	mSettingsLocationList(NULL),
-	mIsFirstRun(false),
-	mMinMicroSecPerFrame(0.f)
+	mIsFirstRun(false)
 {
 	if(NULL != sInstance)
 	{
@@ -978,9 +943,6 @@ bool LLAppViewer::init()
 	}
 	LL_INFOS("InitInfo") << "Cache initialization is done." << LL_ENDL ;
 
-	// Initialize the repeater service.
-	LLMainLoopRepeater::instance().start();
-
 	//
 	// Initialize the window
 	//
@@ -1012,19 +974,6 @@ bool LLAppViewer::init()
 		return 0;
 	}
 
-    // If we don't have the right shader requirements.
-    if (!gGLManager.mHasShaderObjects
-        || !gGLManager.mHasVertexShader
-        || !gGLManager.mHasFragmentShader)
-    {
-        LLUIString details = LLNotifications::instance().getGlobalString("UnsupportedShaderRequirements");
-        OSMessageBox(
-            details.getString(),
-            LLStringUtil::null,
-            OSMB_OK);
-        return 0;
-    }
-
 	// Without SSE2 support we will crash almost immediately, warn here.
 	if (!gSysCPU.hasSSE2())
 	{
@@ -1327,10 +1276,6 @@ bool LLAppViewer::init()
 	joystick = LLViewerJoystick::getInstance();
 	joystick->setNeedsReset(true);
 	/*----------------------------------------------------------------------*/
-
-	gSavedSettings.getControl("FramePerSecondLimit")->getSignal()->connect(boost::bind(&LLAppViewer::onChangeFrameLimit, this, _2));
-	onChangeFrameLimit(gSavedSettings.getLLSD("FramePerSecondLimit"));
-
 	// Load User's bindings
 	loadKeyBindings();
 
@@ -1359,7 +1304,8 @@ void LLAppViewer::initMaxHeapSize()
 }
 
 static LLTrace::BlockTimerStatHandle FTM_MESSAGES("System Messages");
-static LLTrace::BlockTimerStatHandle FTM_SLEEP("Sleep");
+static LLTrace::BlockTimerStatHandle FTM_SLEEP1("Sleep1");
+static LLTrace::BlockTimerStatHandle FTM_SLEEP2("Sleep2");
 static LLTrace::BlockTimerStatHandle FTM_YIELD("Yield");
 
 static LLTrace::BlockTimerStatHandle FTM_TEXTURE_CACHE("Texture Cache");
@@ -1421,13 +1367,17 @@ bool LLAppViewer::frame()
 
 bool LLAppViewer::doFrame()
 {
+	LL_RECORD_BLOCK_TIME(FTM_FRAME);
+
 	LLEventPump& mainloop(LLEventPumps::instance().obtain("mainloop"));
 	LLSD newFrame;
 
-	LL_RECORD_BLOCK_TIME(FTM_FRAME);
-	LLTrace::BlockTimer::processTimes();
-	LLTrace::get_frame_recording().nextPeriod();
-	LLTrace::BlockTimer::logStats();
+	{
+		LL_PROFILE_ZONE_NAMED( "df blocktimer" )
+		LLTrace::BlockTimer::processTimes();
+		LLTrace::get_frame_recording().nextPeriod();
+		LLTrace::BlockTimer::logStats();
+	}
 
 	LLTrace::get_thread_recorder()->pullFromChildren();
 
@@ -1435,6 +1385,7 @@ bool LLAppViewer::doFrame()
 	LL_CLEAR_CALLSTACKS();
 
 	{
+		LL_PROFILE_ZONE_NAMED( "df processMiscNativeEvents" )
 		pingMainloopTimeout("Main:MiscNativeWindowEvents");
 
 		if (gViewerWindow)
@@ -1443,7 +1394,10 @@ bool LLAppViewer::doFrame()
 			gViewerWindow->getWindow()->processMiscNativeEvents();
 		}
 
-		pingMainloopTimeout("Main:GatherInput");
+		{
+			LL_PROFILE_ZONE_NAMED( "df gatherInput" )
+			pingMainloopTimeout("Main:GatherInput");
+		}
 
 		if (gViewerWindow)
 		{
@@ -1467,13 +1421,21 @@ bool LLAppViewer::doFrame()
 			}
 		}
 
-		// canonical per-frame event
-		mainloop.post(newFrame);
-		// give listeners a chance to run
-		llcoro::suspend();
+		{
+			LL_PROFILE_ZONE_NAMED( "df mainloop" )
+			// canonical per-frame event
+			mainloop.post(newFrame);
+		}
+
+		{
+			LL_PROFILE_ZONE_NAMED( "df suspend" )
+			// give listeners a chance to run
+			llcoro::suspend();
+		}
 
 		if (!LLApp::isExiting())
 		{
+			LL_PROFILE_ZONE_NAMED( "df JoystickKeyboard" )
 			pingMainloopTimeout("Main:JoystickKeyboard");
 
 			// Scan keyboard for movement keys.  Command keys and typing
@@ -1494,12 +1456,18 @@ bool LLAppViewer::doFrame()
 
 			// Update state based on messages, user input, object idle.
 			{
-				pauseMainloopTimeout(); // *TODO: Remove. Messages shouldn't be stalling for 20+ seconds!
+				{
+					LL_PROFILE_ZONE_NAMED( "df pauseMainloopTimeout" )
+					pauseMainloopTimeout(); // *TODO: Remove. Messages shouldn't be stalling for 20+ seconds!
+				}
 
 				LL_RECORD_BLOCK_TIME(FTM_IDLE);
 				idle();
 
-				resumeMainloopTimeout();
+				{
+					LL_PROFILE_ZONE_NAMED( "df resumeMainloopTimeout" )
+					resumeMainloopTimeout();
+				}
 			}
 
 			if (gDoDisconnect && (LLStartUp::getStartupState() == STATE_STARTED))
@@ -1514,46 +1482,40 @@ bool LLAppViewer::doFrame()
 			// *TODO: Should we run display() even during gHeadlessClient?  DK 2011-02-18
 			if (!LLApp::isExiting() && !gHeadlessClient && gViewerWindow)
 			{
+				LL_PROFILE_ZONE_NAMED( "df Display" )
 				pingMainloopTimeout("Main:Display");
 				gGLActive = TRUE;
 
 				display();
 
-				static U64 last_call = 0;
-				if (!gTeleportDisplay)
 				{
-					// Frame/draw throttling, controlled by FramePerSecondLimit
-					U64 elapsed_time = LLTimer::getTotalTime() - last_call;
-					if (elapsed_time < mMinMicroSecPerFrame)
-					{
-						LL_RECORD_BLOCK_TIME(FTM_SLEEP);
-						// llclamp for when time function gets funky
-						U64 sleep_time = llclamp(mMinMicroSecPerFrame - elapsed_time, (U64)1, (U64)1e6);
-						micro_sleep(sleep_time, 0);
-					}
-				}
-				last_call = LLTimer::getTotalTime();
-
-				pingMainloopTimeout("Main:Snapshot");
-				LLFloaterSnapshot::update(); // take snapshots
+					LL_PROFILE_ZONE_NAMED( "df Snapshot" )
+					pingMainloopTimeout("Main:Snapshot");
+					LLFloaterSnapshot::update(); // take snapshots
 					LLFloaterOutfitSnapshot::update();
-				gGLActive = FALSE;
+					gGLActive = FALSE;
+				}
 			}
 		}
 
-		pingMainloopTimeout("Main:Sleep");
+		{
+			LL_PROFILE_ZONE_NAMED( "df pauseMainloopTimeout" )
+			pingMainloopTimeout("Main:Sleep");
 
-		pauseMainloopTimeout();
+			pauseMainloopTimeout();
+		}
 
 		// Sleep and run background threads
 		{
-			LL_RECORD_BLOCK_TIME(FTM_SLEEP);
+			//LL_RECORD_BLOCK_TIME(SLEEP2);
+			LL_PROFILE_ZONE_WARN( "Sleep2" )
 
 			// yield some time to the os based on command line option
 			static LLCachedControl<S32> yield_time(gSavedSettings, "YieldTime", -1);
 			if(yield_time >= 0)
 			{
 				LL_RECORD_BLOCK_TIME(FTM_YIELD);
+				LL_PROFILE_ZONE_NUM( yield_time )
 				ms_sleep(yield_time);
 			}
 
@@ -1616,16 +1578,22 @@ bool LLAppViewer::doFrame()
 				total_io_pending += io_pending ;
 
 			}
-			gMeshRepo.update() ;
+
+			{
+				LL_PROFILE_ZONE_NAMED( "df gMeshRepo" )
+				gMeshRepo.update() ;
+			}
 
 			if(!total_work_pending) //pause texture fetching threads if nothing to process.
 			{
+				LL_PROFILE_ZONE_NAMED( "df getTextureCache" )
 				LLAppViewer::getTextureCache()->pause();
 				LLAppViewer::getImageDecodeThread()->pause();
 				LLAppViewer::getTextureFetch()->pause();
 			}
 			if(!total_io_pending) //pause file threads if nothing to process.
 			{
+				LL_PROFILE_ZONE_NAMED( "df LLVFSThread" )
 				LLVFSThread::sLocal->pause();
 				LLLFSThread::sLocal->pause();
 			}
@@ -1633,6 +1601,7 @@ bool LLAppViewer::doFrame()
 			//texture fetching debugger
 			if(LLTextureFetchDebugger::isEnabled())
 			{
+				LL_PROFILE_ZONE_NAMED( "df tex_fetch_debugger_instance" )
 				LLFloaterTextureFetchDebugger* tex_fetch_debugger_instance =
 					LLFloaterReg::findTypedInstance<LLFloaterTextureFetchDebugger>("tex_fetch_debugger");
 				if(tex_fetch_debugger_instance)
@@ -1641,8 +1610,10 @@ bool LLAppViewer::doFrame()
 				}
 			}
 
-			resumeMainloopTimeout();
-
+			{
+				LL_PROFILE_ZONE_NAMED( "df resumeMainloopTimeout" )
+				resumeMainloopTimeout();
+			}
 			pingMainloopTimeout("Main:End");
 		}
 	}
@@ -1668,7 +1639,7 @@ bool LLAppViewer::doFrame()
 		LL_INFOS() << "Exiting main_loop" << LL_ENDL;
 	}
 
-    LLPROFILE_UPDATE();
+    LL_PROFILER_FRAME_END
 
 	return ! LLApp::isRunning();
 }
@@ -2162,8 +2133,6 @@ bool LLAppViewer::cleanup()
 	SUBSYSTEM_CLEANUP(LLProxy);
     LLCore::LLHttp::cleanup();
 
-	LLMainLoopRepeater::instance().stop();
-
 	ll_close_fail_log();
 
 	LLError::LLCallStacks::cleanup();
@@ -2338,7 +2307,7 @@ bool LLAppViewer::loadSettingsFromDirectory(const std::string& location_key,
 			LL_INFOS("Settings") << "Attempting to load settings for the group " << file.name()
 			    << " - from location " << location_key << LL_ENDL;
 
-			LLControlGroup* settings_group = LLControlGroup::getInstance(file.name);
+			auto settings_group = LLControlGroup::getInstance(file.name);
 			if(!settings_group)
 			{
 				LL_WARNS("Settings") << "No matching settings group for name " << file.name() << LL_ENDL;
@@ -2483,12 +2452,7 @@ bool LLAppViewer::initConfiguration()
 #ifndef	LL_RELEASE_FOR_DOWNLOAD
 	// provide developer build only overrides for these control variables that are not
 	// persisted to settings.xml
-	LLControlVariable* c = gSavedSettings.getControl("ShowConsoleWindow");
-	if (c)
-	{
-		c->setValue(true, false);
-	}
-	c = gSavedSettings.getControl("AllowMultipleViewers");
+	LLControlVariable* c = gSavedSettings.getControl("AllowMultipleViewers");
 	if (c)
 	{
 		c->setValue(true, false);
@@ -2640,7 +2604,7 @@ bool LLAppViewer::initConfiguration()
 					group_part = name.substr(0, pos);
 					name_part = name.substr(pos+1);
 					LL_INFOS() << "Setting " << group_part << "." << name_part << " to " << value << LL_ENDL;
-					LLControlGroup* g = LLControlGroup::getInstance(group_part);
+					auto g = LLControlGroup::getInstance(group_part);
 					if (g) control = g->getControl(name_part);
 				}
 				else
@@ -4433,7 +4397,7 @@ bool LLAppViewer::initCache()
 
 void LLAppViewer::addOnIdleCallback(const boost::function<void()>& cb)
 {
-	LLDeferredTaskList::instance().addTask(cb);
+	gMainloopWork.post(cb);
 }
 
 void LLAppViewer::loadKeyBindings()
@@ -5207,8 +5171,19 @@ void LLAppViewer::idle()
 		}
 	}
 
-	// Execute deferred tasks.
-	LLDeferredTaskList::instance().run();
+	// Service the WorkQueue we use for replies from worker threads.
+	// Use function statics for the timeslice setting so we only have to fetch
+	// and convert MainWorkTime once.
+	static F32 MainWorkTimeRaw = gSavedSettings.getF32("MainWorkTime");
+	static F32Milliseconds MainWorkTimeMs(MainWorkTimeRaw);
+	// MainWorkTime is specified in fractional milliseconds, but std::chrono
+	// uses integer representations. What if we want less than a microsecond?
+	// Use nanoseconds. We're very sure we will never need to specify a
+	// MainWorkTime that would be larger than we could express in
+	// std::chrono::nanoseconds.
+	static std::chrono::nanoseconds MainWorkTimeNanoSec{
+		std::chrono::nanoseconds::rep(MainWorkTimeMs.value() * 1000000)};
+	gMainloopWork.runFor(MainWorkTimeNanoSec);
 
 	// Handle shutdown process, for example,
 	// wait for floaters to close, send quit message,
@@ -5616,19 +5591,6 @@ void LLAppViewer::disconnectViewer()
 	LLUrlEntryParcel::setDisconnected(gDisconnected);
 }
 
-bool LLAppViewer::onChangeFrameLimit(LLSD const & evt)
-{
-	if (evt.asInteger() > 0)
-	{
-		mMinMicroSecPerFrame = (U64)(1000000.0f / F32(evt.asInteger()));
-	}
-	else
-	{
-		mMinMicroSecPerFrame = 0;
-	}
-	return false;
-}
-
 void LLAppViewer::forceErrorLLError()
 {
    	LL_ERRS() << "This is a deliberate llerror" << LL_ENDL;
diff --git a/indra/newview/llappviewer.h b/indra/newview/llappviewer.h
index 95f6efa29a..37119aeef9 100644
--- a/indra/newview/llappviewer.h
+++ b/indra/newview/llappviewer.h
@@ -261,8 +261,6 @@ private:
     void sendLogoutRequest();
     void disconnectViewer();
 
-	bool onChangeFrameLimit(LLSD const & evt);
-
 	// *FIX: the app viewer class should be some sort of singleton, no?
 	// Perhaps its child class is the singleton and this should be an abstract base.
 	static LLAppViewer* sInstance; 
@@ -318,10 +316,7 @@ private:
 	// llcorehttp library init/shutdown helper
 	LLAppCoreHttp mAppCoreHttp;
 
-        bool mIsFirstRun;
-	U64 mMinMicroSecPerFrame; // frame throttling
-
-
+    bool mIsFirstRun;
 };
 
 // consts from viewer.h
diff --git a/indra/newview/llappviewerwin32.cpp b/indra/newview/llappviewerwin32.cpp
index 25d18fa11f..758bd73cb0 100644
--- a/indra/newview/llappviewerwin32.cpp
+++ b/indra/newview/llappviewerwin32.cpp
@@ -323,6 +323,10 @@ int APIENTRY WINMAIN(HINSTANCE hInstance,
                      PWSTR     pCmdLine,
                      int       nCmdShow)
 {
+    // Call Tracy first thing to have it allocate memory
+    // https://github.com/wolfpld/tracy/issues/196
+    LL_PROFILER_FRAME_END
+
 	const S32 MAX_HEAPS = 255;
 	DWORD heap_enable_lfh_error[MAX_HEAPS];
 	S32 num_heaps = 0;
diff --git a/indra/newview/llbrowsernotification.cpp b/indra/newview/llbrowsernotification.cpp
index 0460bff1b4..30ac35fff7 100644
--- a/indra/newview/llbrowsernotification.cpp
+++ b/indra/newview/llbrowsernotification.cpp
@@ -43,14 +43,14 @@ LLBrowserNotification::LLBrowserNotification()
 bool LLBrowserNotification::processNotification(const LLNotificationPtr& notification)
 {
 	LLUUID media_id = notification->getPayload()["media_id"].asUUID();
-	LLMediaCtrl* media_instance = LLMediaCtrl::getInstance(media_id);
+	auto media_instance = LLMediaCtrl::getInstance(media_id);
 	if (media_instance)
 	{
 		media_instance->showNotification(notification);
 	}
 	else if (LLViewerMediaFocus::instance().getControlsMediaID() == media_id)
 	{
-		LLViewerMediaImpl* impl = LLViewerMedia::getInstance()->getMediaImplFromTextureID(media_id);
+		auto impl = LLViewerMedia::getInstance()->getMediaImplFromTextureID(media_id);
 		if (impl)
 		{
 			impl->showNotification(notification);
diff --git a/indra/newview/llcontrolavatar.cpp b/indra/newview/llcontrolavatar.cpp
index fab249f988..606e670805 100644
--- a/indra/newview/llcontrolavatar.cpp
+++ b/indra/newview/llcontrolavatar.cpp
@@ -241,7 +241,7 @@ void LLControlAvatar::matchVolumeTransform()
 			if (skin_info)
 			{
                 LL_DEBUGS("BindShape") << getFullname() << " bind shape " << skin_info->mBindShapeMatrix << LL_ENDL;
-                bind_rot = LLSkinningUtil::getUnscaledQuaternion(skin_info->mBindShapeMatrix);
+                bind_rot = LLSkinningUtil::getUnscaledQuaternion(LLMatrix4(skin_info->mBindShapeMatrix));
 			}
 #endif
 			setRotation(bind_rot*obj_rot);
diff --git a/indra/newview/lldrawable.cpp b/indra/newview/lldrawable.cpp
index 507af56cb0..30c4a21e1c 100644
--- a/indra/newview/lldrawable.cpp
+++ b/indra/newview/lldrawable.cpp
@@ -101,6 +101,8 @@ LLDrawable::LLDrawable(LLViewerObject *vobj, bool new_entry)
 
 void LLDrawable::init(bool new_entry)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	// mXform
 	mParent = NULL;
 	mRenderType = 0;
@@ -232,6 +234,8 @@ void LLDrawable::markDead()
 
 LLVOVolume* LLDrawable::getVOVolume() const
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	LLViewerObject* objectp = mVObjp;
 	if ( !isDead() && objectp && (objectp->getPCode() == LL_PCODE_VOLUME))
 	{
@@ -335,6 +339,7 @@ static LLTrace::BlockTimerStatHandle FTM_ALLOCATE_FACE("Allocate Face");
 
 LLFace*	LLDrawable::addFace(LLFacePool *poolp, LLViewerTexture *texturep)
 {
+	LL_PROFILE_ZONE_SCOPED
 	
 	LLFace *face;
 	{
@@ -363,6 +368,8 @@ LLFace*	LLDrawable::addFace(LLFacePool *poolp, LLViewerTexture *texturep)
 
 LLFace*	LLDrawable::addFace(const LLTextureEntry *te, LLViewerTexture *texturep)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	LLFace *face;
 
 	{
@@ -387,6 +394,8 @@ LLFace*	LLDrawable::addFace(const LLTextureEntry *te, LLViewerTexture *texturep)
 
 LLFace*	LLDrawable::addFace(const LLTextureEntry *te, LLViewerTexture *texturep, LLViewerTexture *normalp)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	LLFace *face;
 	face = new LLFace(this, mVObjp);
 	
@@ -408,6 +417,8 @@ LLFace*	LLDrawable::addFace(const LLTextureEntry *te, LLViewerTexture *texturep,
 
 LLFace*	LLDrawable::addFace(const LLTextureEntry *te, LLViewerTexture *texturep, LLViewerTexture *normalp, LLViewerTexture *specularp)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	LLFace *face;
 	face = new LLFace(this, mVObjp);
 	
@@ -430,6 +441,8 @@ LLFace*	LLDrawable::addFace(const LLTextureEntry *te, LLViewerTexture *texturep,
 
 void LLDrawable::setNumFaces(const S32 newFaces, LLFacePool *poolp, LLViewerTexture *texturep)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	if (newFaces == (S32)mFaces.size())
 	{
 		return;
@@ -453,6 +466,8 @@ void LLDrawable::setNumFaces(const S32 newFaces, LLFacePool *poolp, LLViewerText
 
 void LLDrawable::setNumFacesFast(const S32 newFaces, LLFacePool *poolp, LLViewerTexture *texturep)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	if (newFaces <= (S32)mFaces.size() && newFaces >= (S32)mFaces.size()/2)
 	{
 		return;
@@ -476,6 +491,8 @@ void LLDrawable::setNumFacesFast(const S32 newFaces, LLFacePool *poolp, LLViewer
 
 void LLDrawable::mergeFaces(LLDrawable* src)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	U32 face_count = mFaces.size() + src->mFaces.size();
 
 	mFaces.reserve(face_count);
@@ -509,6 +526,8 @@ void LLDrawable::updateMaterial()
 
 void LLDrawable::makeActive()
 {		
+	LL_PROFILE_ZONE_SCOPED
+
 #if !LL_RELEASE_FOR_DOWNLOAD
 	if (mVObjp.notNull())
 	{
@@ -572,6 +591,8 @@ void LLDrawable::makeActive()
 
 void LLDrawable::makeStatic(BOOL warning_enabled)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	if (isState(ACTIVE) && 
 		!isState(ACTIVE_CHILD) && 
 		!mVObjp->isAttachment() && 
@@ -618,6 +639,8 @@ void LLDrawable::makeStatic(BOOL warning_enabled)
 // Returns "distance" between target destination and resulting xfrom
 F32 LLDrawable::updateXform(BOOL undamped)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	BOOL damped = !undamped;
 
 	// Position
@@ -769,6 +792,8 @@ void LLDrawable::moveUpdatePipeline(BOOL moved)
 
 void LLDrawable::movePartition()
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	LLSpatialPartition* part = getSpatialPartition();
 	if (part)
 	{
@@ -813,6 +838,8 @@ BOOL LLDrawable::updateMoveUndamped()
 
 void LLDrawable::updatePartition()
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	if (!getVOVolume())
 	{
 		movePartition();
@@ -830,6 +857,8 @@ void LLDrawable::updatePartition()
 
 BOOL LLDrawable::updateMoveDamped()
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	F32 dist_squared = updateXform(FALSE);
 
 	mGeneration++;
@@ -853,6 +882,8 @@ BOOL LLDrawable::updateMoveDamped()
 
 void LLDrawable::updateDistance(LLCamera& camera, bool force_update)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	if (LLViewerCamera::sCurCameraID != LLViewerCamera::CAMERA_WORLD)
 	{
 		LL_WARNS() << "Attempted to update distance for non-world camera." << LL_ENDL;
@@ -957,6 +988,8 @@ void LLDrawable::updateTexture()
 
 BOOL LLDrawable::updateGeometry(BOOL priority)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	llassert(mVObjp.notNull());
 	BOOL res = mVObjp->updateGeometry(this);
 	return res;
@@ -1034,6 +1067,8 @@ const LLVector3& LLDrawable::getBounds(LLVector3& min, LLVector3& max) const
 
 void LLDrawable::updateSpatialExtents()
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	if (mVObjp)
 	{
 		const LLVector4a* exts = getSpatialExtents();
@@ -1164,6 +1199,8 @@ void LLDrawable::setGroup(LLViewerOctreeGroup *groupp)
 
 LLSpatialPartition* LLDrawable::getSpatialPartition()
 { 
+	LL_PROFILE_ZONE_SCOPED
+
 	LLSpatialPartition* retval = NULL;
 	
 	if (!mVObjp || 
@@ -1247,6 +1284,8 @@ LLSpatialBridge::LLSpatialBridge(LLDrawable* root, BOOL render_by_group, U32 dat
 	LLDrawable(root->getVObj(), true),
 	LLSpatialPartition(data_mask, render_by_group, GL_STREAM_DRAW_ARB, regionp)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	mBridge = this;
 	mDrawable = root;
 	root->setSpatialBridge(this);
@@ -1292,6 +1331,8 @@ void LLSpatialBridge::destroyTree()
 
 void LLSpatialBridge::updateSpatialExtents()
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	LLSpatialGroup* root = (LLSpatialGroup*) mOctree->getListener(0);
 	
 	{
@@ -1455,6 +1496,8 @@ public:
 
 void LLSpatialBridge::setVisible(LLCamera& camera_in, std::vector<LLDrawable*>* results, BOOL for_select)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	if (!gPipeline.hasRenderType(mDrawableType))
 	{
 		return;
@@ -1552,6 +1595,8 @@ void LLSpatialBridge::setVisible(LLCamera& camera_in, std::vector<LLDrawable*>*
 
 void LLSpatialBridge::updateDistance(LLCamera& camera_in, bool force_update)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	if (mDrawable == NULL)
 	{
 		markDead();
diff --git a/indra/newview/lldrawpoolavatar.cpp b/indra/newview/lldrawpoolavatar.cpp
index 687b13d2c8..02ab316256 100644
--- a/indra/newview/lldrawpoolavatar.cpp
+++ b/indra/newview/lldrawpoolavatar.cpp
@@ -121,6 +121,8 @@ LLDrawPoolAvatar::~LLDrawPoolAvatar()
 // virtual
 BOOL LLDrawPoolAvatar::isDead()
 {
+    LL_PROFILE_ZONE_SCOPED
+
     if (!LLFacePool::isDead())
     {
         return FALSE;
@@ -138,11 +140,15 @@ BOOL LLDrawPoolAvatar::isDead()
 
 S32 LLDrawPoolAvatar::getShaderLevel() const
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	return (S32) LLViewerShaderMgr::instance()->getShaderLevel(LLViewerShaderMgr::SHADER_AVATAR);
 }
 
 void LLDrawPoolAvatar::prerender()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	mShaderLevel = LLViewerShaderMgr::instance()->getShaderLevel(LLViewerShaderMgr::SHADER_AVATAR);
 	
 	sShaderLevel = mShaderLevel;
@@ -163,12 +169,15 @@ void LLDrawPoolAvatar::prerender()
 		{
 			LLVOAvatar* avatarp = (LLVOAvatar *)facep->getDrawable()->getVObj().get();
 			updateRiggedVertexBuffers(avatarp);
+            updateSkinInfoMatrixPalettes(avatarp);
 		}
 	}
 }
 
 LLMatrix4& LLDrawPoolAvatar::getModelView()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	static LLMatrix4 ret;
 
 	ret.initRows(LLVector4(gGLModelView+0),
@@ -257,6 +266,8 @@ void LLDrawPoolAvatar::endDeferredPass(S32 pass)
 
 void LLDrawPoolAvatar::renderDeferred(S32 pass)
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	render(pass);
 }
 
@@ -267,6 +278,8 @@ S32 LLDrawPoolAvatar::getNumPostDeferredPasses()
 
 void LLDrawPoolAvatar::beginPostDeferredPass(S32 pass)
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	switch (pass)
 	{
 	case 0:
@@ -295,6 +308,8 @@ void LLDrawPoolAvatar::beginPostDeferredPass(S32 pass)
 
 void LLDrawPoolAvatar::beginPostDeferredAlpha()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	sSkipOpaque = TRUE;
 	sShaderLevel = mShaderLevel;
 	sVertexProgram = &gDeferredAvatarAlphaProgram;
@@ -309,6 +324,8 @@ void LLDrawPoolAvatar::beginPostDeferredAlpha()
 
 void LLDrawPoolAvatar::beginDeferredRiggedAlpha()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	sVertexProgram = &gDeferredSkinnedAlphaProgram;
 	gPipeline.bindDeferredShader(*sVertexProgram);
 	sDiffuseChannel = sVertexProgram->enableTexture(LLViewerShaderMgr::DIFFUSE_MAP);
@@ -317,6 +334,8 @@ void LLDrawPoolAvatar::beginDeferredRiggedAlpha()
 
 void LLDrawPoolAvatar::beginDeferredRiggedMaterialAlpha(S32 pass)
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	switch (pass)
 	{
 	case 0: pass = 1; break;
@@ -343,6 +362,8 @@ void LLDrawPoolAvatar::beginDeferredRiggedMaterialAlpha(S32 pass)
 
 void LLDrawPoolAvatar::endDeferredRiggedAlpha()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	LLVertexBuffer::unbind();
 	gPipeline.unbindDeferredShader(*sVertexProgram);
 	sDiffuseChannel = 0;
@@ -353,6 +374,8 @@ void LLDrawPoolAvatar::endDeferredRiggedAlpha()
 
 void LLDrawPoolAvatar::endPostDeferredPass(S32 pass)
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	switch (pass)
 	{
 	case 0:
@@ -381,6 +404,8 @@ void LLDrawPoolAvatar::endPostDeferredPass(S32 pass)
 
 void LLDrawPoolAvatar::endPostDeferredAlpha()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	// if we're in software-blending, remember to set the fence _after_ we draw so we wait till this rendering is done
 	sRenderingSkinned = FALSE;
 	sSkipOpaque = FALSE;
@@ -392,6 +417,8 @@ void LLDrawPoolAvatar::endPostDeferredAlpha()
 
 void LLDrawPoolAvatar::renderPostDeferred(S32 pass)
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	static const S32 actual_pass[] =
 	{ //map post deferred pass numbers to what render() expects
 		2, //skinned
@@ -428,225 +455,235 @@ S32 LLDrawPoolAvatar::getNumShadowPasses()
 void LLDrawPoolAvatar::beginShadowPass(S32 pass)
 {
 	LL_RECORD_BLOCK_TIME(FTM_SHADOW_AVATAR);
+    {
+        LL_PROFILE_ZONE_SCOPED;
 
-	if (pass == SHADOW_PASS_AVATAR_OPAQUE)
-	{
-		sVertexProgram = &gDeferredAvatarShadowProgram;
-		
-		if ((sShaderLevel > 0))  // for hardware blending
-		{
-			sRenderingSkinned = TRUE;
-			sVertexProgram->bind();
-		}
+        if (pass == SHADOW_PASS_AVATAR_OPAQUE)
+        {
+            sVertexProgram = &gDeferredAvatarShadowProgram;
 
-		gGL.diffuseColor4f(1,1,1,1);
-	}
-    else if (pass == SHADOW_PASS_AVATAR_ALPHA_BLEND)
-	{
-		sVertexProgram = &gDeferredAvatarAlphaShadowProgram;
+            if ((sShaderLevel > 0))  // for hardware blending
+            {
+                sRenderingSkinned = TRUE;
+                sVertexProgram->bind();
+            }
 
-        // bind diffuse tex so we can reference the alpha channel...
-        S32 loc = sVertexProgram->getUniformLocation(LLViewerShaderMgr::DIFFUSE_MAP);
-        sDiffuseChannel = 0;
-        if (loc != -1)
+            gGL.diffuseColor4f(1, 1, 1, 1);
+        }
+        else if (pass == SHADOW_PASS_AVATAR_ALPHA_BLEND)
         {
-            sDiffuseChannel = sVertexProgram->enableTexture(LLViewerShaderMgr::DIFFUSE_MAP);
-		}
+            sVertexProgram = &gDeferredAvatarAlphaShadowProgram;
 
-		if ((sShaderLevel > 0))  // for hardware blending
-		{
-			sRenderingSkinned = TRUE;
-			sVertexProgram->bind();
-		}
+            // bind diffuse tex so we can reference the alpha channel...
+            S32 loc = sVertexProgram->getUniformLocation(LLViewerShaderMgr::DIFFUSE_MAP);
+            sDiffuseChannel = 0;
+            if (loc != -1)
+            {
+                sDiffuseChannel = sVertexProgram->enableTexture(LLViewerShaderMgr::DIFFUSE_MAP);
+            }
 
-		gGL.diffuseColor4f(1,1,1,1);
-	}
-    else if (pass == SHADOW_PASS_AVATAR_ALPHA_MASK)
-	{
-		sVertexProgram = &gDeferredAvatarAlphaMaskShadowProgram;
+            if ((sShaderLevel > 0))  // for hardware blending
+            {
+                sRenderingSkinned = TRUE;
+                sVertexProgram->bind();
+            }
 
-        // bind diffuse tex so we can reference the alpha channel...
-        S32 loc = sVertexProgram->getUniformLocation(LLViewerShaderMgr::DIFFUSE_MAP);
-        sDiffuseChannel = 0;
-        if (loc != -1)
+            gGL.diffuseColor4f(1, 1, 1, 1);
+        }
+        else if (pass == SHADOW_PASS_AVATAR_ALPHA_MASK)
         {
-            sDiffuseChannel = sVertexProgram->enableTexture(LLViewerShaderMgr::DIFFUSE_MAP);
-		}
+            sVertexProgram = &gDeferredAvatarAlphaMaskShadowProgram;
 
-		if ((sShaderLevel > 0))  // for hardware blending
-		{
-			sRenderingSkinned = TRUE;
-			sVertexProgram->bind();
-		}
+            // bind diffuse tex so we can reference the alpha channel...
+            S32 loc = sVertexProgram->getUniformLocation(LLViewerShaderMgr::DIFFUSE_MAP);
+            sDiffuseChannel = 0;
+            if (loc != -1)
+            {
+                sDiffuseChannel = sVertexProgram->enableTexture(LLViewerShaderMgr::DIFFUSE_MAP);
+            }
 
-		gGL.diffuseColor4f(1,1,1,1);
-	}
-    else if (pass == SHADOW_PASS_ATTACHMENT_ALPHA_BLEND)
-	{
-		sVertexProgram = &gDeferredAttachmentAlphaShadowProgram;
+            if ((sShaderLevel > 0))  // for hardware blending
+            {
+                sRenderingSkinned = TRUE;
+                sVertexProgram->bind();
+            }
 
-        // bind diffuse tex so we can reference the alpha channel...
-        S32 loc = sVertexProgram->getUniformLocation(LLViewerShaderMgr::DIFFUSE_MAP);
-        sDiffuseChannel = 0;
-        if (loc != -1)
+            gGL.diffuseColor4f(1, 1, 1, 1);
+        }
+        else if (pass == SHADOW_PASS_ATTACHMENT_ALPHA_BLEND)
         {
-            sDiffuseChannel = sVertexProgram->enableTexture(LLViewerShaderMgr::DIFFUSE_MAP);
-		}
-		
-		if ((sShaderLevel > 0))  // for hardware blending
-		{
-			sRenderingSkinned = TRUE;
-			sVertexProgram->bind();
-		}
+            sVertexProgram = &gDeferredAttachmentAlphaShadowProgram;
 
-		gGL.diffuseColor4f(1,1,1,1);
-	}
-    else if (pass == SHADOW_PASS_ATTACHMENT_ALPHA_MASK)
-	{
-		sVertexProgram = &gDeferredAttachmentAlphaMaskShadowProgram;
+            // bind diffuse tex so we can reference the alpha channel...
+            S32 loc = sVertexProgram->getUniformLocation(LLViewerShaderMgr::DIFFUSE_MAP);
+            sDiffuseChannel = 0;
+            if (loc != -1)
+            {
+                sDiffuseChannel = sVertexProgram->enableTexture(LLViewerShaderMgr::DIFFUSE_MAP);
+            }
+
+            if ((sShaderLevel > 0))  // for hardware blending
+            {
+                sRenderingSkinned = TRUE;
+                sVertexProgram->bind();
+            }
 
-        // bind diffuse tex so we can reference the alpha channel...
-		S32 loc = sVertexProgram->getUniformLocation(LLViewerShaderMgr::DIFFUSE_MAP);
-        sDiffuseChannel = 0;
-        if (loc != -1)
+            gGL.diffuseColor4f(1, 1, 1, 1);
+        }
+        else if (pass == SHADOW_PASS_ATTACHMENT_ALPHA_MASK)
         {
-            sDiffuseChannel = sVertexProgram->enableTexture(LLViewerShaderMgr::DIFFUSE_MAP);
-		}
+            sVertexProgram = &gDeferredAttachmentAlphaMaskShadowProgram;
 
-		if ((sShaderLevel > 0))  // for hardware blending
-		{
-			sRenderingSkinned = TRUE;
-			sVertexProgram->bind();
-		}
+            // bind diffuse tex so we can reference the alpha channel...
+            S32 loc = sVertexProgram->getUniformLocation(LLViewerShaderMgr::DIFFUSE_MAP);
+            sDiffuseChannel = 0;
+            if (loc != -1)
+            {
+                sDiffuseChannel = sVertexProgram->enableTexture(LLViewerShaderMgr::DIFFUSE_MAP);
+            }
 
-		gGL.diffuseColor4f(1,1,1,1);
-	}
-	else // SHADOW_PASS_ATTACHMENT_OPAQUE
-	{
-		sVertexProgram = &gDeferredAttachmentShadowProgram;
-		S32 loc = sVertexProgram->getUniformLocation(LLViewerShaderMgr::DIFFUSE_MAP);
-        sDiffuseChannel = 0;
-        if (loc != -1)
+            if ((sShaderLevel > 0))  // for hardware blending
+            {
+                sRenderingSkinned = TRUE;
+                sVertexProgram->bind();
+            }
+
+            gGL.diffuseColor4f(1, 1, 1, 1);
+        }
+        else // SHADOW_PASS_ATTACHMENT_OPAQUE
         {
-            sDiffuseChannel = sVertexProgram->enableTexture(LLViewerShaderMgr::DIFFUSE_MAP);
-		}
-		sVertexProgram->bind();
-	}
+            sVertexProgram = &gDeferredAttachmentShadowProgram;
+            S32 loc = sVertexProgram->getUniformLocation(LLViewerShaderMgr::DIFFUSE_MAP);
+            sDiffuseChannel = 0;
+            if (loc != -1)
+            {
+                sDiffuseChannel = sVertexProgram->enableTexture(LLViewerShaderMgr::DIFFUSE_MAP);
+            }
+            sVertexProgram->bind();
+        }
+    }
 }
 
 void LLDrawPoolAvatar::endShadowPass(S32 pass)
 {
 	LL_RECORD_BLOCK_TIME(FTM_SHADOW_AVATAR);
+    {
+        LL_PROFILE_ZONE_SCOPED;
 
-	if (pass == SHADOW_PASS_ATTACHMENT_OPAQUE)
-	{
-		LLVertexBuffer::unbind();
-	}
+        if (pass == SHADOW_PASS_ATTACHMENT_OPAQUE)
+        {
+            LLVertexBuffer::unbind();
+        }
 
-    if (sShaderLevel > 0)
-	{			
-		sVertexProgram->unbind();
-	}
-    sVertexProgram = NULL;
-    sRenderingSkinned = FALSE;
-    LLDrawPoolAvatar::sShadowPass = -1;
+        if (sShaderLevel > 0)
+        {
+            sVertexProgram->unbind();
+        }
+        sVertexProgram = NULL;
+        sRenderingSkinned = FALSE;
+        LLDrawPoolAvatar::sShadowPass = -1;
+    }
 }
 
 void LLDrawPoolAvatar::renderShadow(S32 pass)
 {
-	LL_RECORD_BLOCK_TIME(FTM_SHADOW_AVATAR);
+    LL_RECORD_BLOCK_TIME(FTM_SHADOW_AVATAR);
+    {
+        LL_PROFILE_ZONE_SCOPED;
 
-	if (mDrawFace.empty())
-	{
-		return;
-	}
+        if (mDrawFace.empty())
+        {
+            return;
+        }
 
-	const LLFace *facep = mDrawFace[0];
-	if (!facep->getDrawable())
-	{
-		return;
-	}
-	LLVOAvatar *avatarp = (LLVOAvatar *)facep->getDrawable()->getVObj().get();
+        const LLFace *facep = mDrawFace[0];
+        if (!facep->getDrawable())
+        {
+            return;
+        }
+        LLVOAvatar *avatarp = (LLVOAvatar *)facep->getDrawable()->getVObj().get();
 
-	if (avatarp->isDead() || avatarp->isUIAvatar() || avatarp->mDrawable.isNull())
-	{
-		return;
-	}
-	LLVOAvatar::AvatarOverallAppearance oa = avatarp->getOverallAppearance();
-	BOOL impostor = !LLPipeline::sImpostorRender && avatarp->isImpostor();
-	if (oa == LLVOAvatar::AOA_INVISIBLE ||
-		(impostor && oa == LLVOAvatar::AOA_JELLYDOLL))
-	{
-		// No shadows for jellydolled or invisible avs.
-		return;
-	}
-	
-    LLDrawPoolAvatar::sShadowPass = pass;
+        if (avatarp->isDead() || avatarp->isUIAvatar() || avatarp->mDrawable.isNull())
+        {
+            return;
+        }
+        LLVOAvatar::AvatarOverallAppearance oa = avatarp->getOverallAppearance();
+        BOOL impostor = !LLPipeline::sImpostorRender && avatarp->isImpostor();
+        if (impostor || (oa == LLVOAvatar::AOA_INVISIBLE))
+        {
+            // No shadows for impostored (including jellydolled) or invisible avs.
+            return;
+        }
 
-	if (pass == SHADOW_PASS_AVATAR_OPAQUE)
-	{
-        LLDrawPoolAvatar::sSkipTransparent = true;
-		avatarp->renderSkinned();
-        LLDrawPoolAvatar::sSkipTransparent = false;
-	}
-    else if (pass == SHADOW_PASS_AVATAR_ALPHA_BLEND)
-	{
-        LLDrawPoolAvatar::sSkipOpaque = true;
-		avatarp->renderSkinned();
-        LLDrawPoolAvatar::sSkipOpaque = false;
-	}
-    else if (pass == SHADOW_PASS_AVATAR_ALPHA_MASK)
-	{
-        LLDrawPoolAvatar::sSkipOpaque = true;
-		avatarp->renderSkinned();
-        LLDrawPoolAvatar::sSkipOpaque = false;
-	}
-    else if (pass == SHADOW_PASS_ATTACHMENT_ALPHA_BLEND) // rigged alpha
-	{
-        LLDrawPoolAvatar::sSkipOpaque = true;
-        renderRigged(avatarp, RIGGED_MATERIAL_ALPHA);
-        renderRigged(avatarp, RIGGED_MATERIAL_ALPHA_EMISSIVE);
-        renderRigged(avatarp, RIGGED_ALPHA);
-        renderRigged(avatarp, RIGGED_FULLBRIGHT_ALPHA);
-        renderRigged(avatarp, RIGGED_GLOW);
-        renderRigged(avatarp, RIGGED_SPECMAP_BLEND);
-        renderRigged(avatarp, RIGGED_NORMMAP_BLEND);
-        renderRigged(avatarp, RIGGED_NORMSPEC_BLEND);
-        LLDrawPoolAvatar::sSkipOpaque = false;
-	}
-    else if (pass == SHADOW_PASS_ATTACHMENT_ALPHA_MASK) // rigged alpha mask
-	{
-        LLDrawPoolAvatar::sSkipOpaque = true;
-        renderRigged(avatarp, RIGGED_MATERIAL_ALPHA_MASK);
-        renderRigged(avatarp, RIGGED_NORMMAP_MASK);
-        renderRigged(avatarp, RIGGED_SPECMAP_MASK);
-		renderRigged(avatarp, RIGGED_NORMSPEC_MASK);    
-        renderRigged(avatarp, RIGGED_GLOW);
-        LLDrawPoolAvatar::sSkipOpaque = false;
-	}
-	else // rigged opaque (SHADOW_PASS_ATTACHMENT_OPAQUE
-	{
-        LLDrawPoolAvatar::sSkipTransparent = true;
-		renderRigged(avatarp, RIGGED_MATERIAL);
-        renderRigged(avatarp, RIGGED_SPECMAP);
-		renderRigged(avatarp, RIGGED_SPECMAP_EMISSIVE);
-		renderRigged(avatarp, RIGGED_NORMMAP);		
-		renderRigged(avatarp, RIGGED_NORMMAP_EMISSIVE);
-		renderRigged(avatarp, RIGGED_NORMSPEC);
-		renderRigged(avatarp, RIGGED_NORMSPEC_EMISSIVE);
-		renderRigged(avatarp, RIGGED_SIMPLE);
-		renderRigged(avatarp, RIGGED_FULLBRIGHT);
-		renderRigged(avatarp, RIGGED_SHINY);
-		renderRigged(avatarp, RIGGED_FULLBRIGHT_SHINY);
-		renderRigged(avatarp, RIGGED_GLOW);
-		renderRigged(avatarp, RIGGED_DEFERRED_BUMP);
-		renderRigged(avatarp, RIGGED_DEFERRED_SIMPLE);
-        LLDrawPoolAvatar::sSkipTransparent = false;
-	}
+        LLDrawPoolAvatar::sShadowPass = pass;
+
+        if (pass == SHADOW_PASS_AVATAR_OPAQUE)
+        {
+            LLDrawPoolAvatar::sSkipTransparent = true;
+            avatarp->renderSkinned();
+            LLDrawPoolAvatar::sSkipTransparent = false;
+        }
+        else if (pass == SHADOW_PASS_AVATAR_ALPHA_BLEND)
+        {
+            LLDrawPoolAvatar::sSkipOpaque = true;
+            avatarp->renderSkinned();
+            LLDrawPoolAvatar::sSkipOpaque = false;
+        }
+        else if (pass == SHADOW_PASS_AVATAR_ALPHA_MASK)
+        {
+            LLDrawPoolAvatar::sSkipOpaque = true;
+            avatarp->renderSkinned();
+            LLDrawPoolAvatar::sSkipOpaque = false;
+        }
+        else if (pass == SHADOW_PASS_ATTACHMENT_ALPHA_BLEND) // rigged alpha
+        {
+            LLDrawPoolAvatar::sSkipOpaque = true;
+            renderRigged(avatarp, RIGGED_MATERIAL_ALPHA);
+            renderRigged(avatarp, RIGGED_MATERIAL_ALPHA_EMISSIVE);
+            renderRigged(avatarp, RIGGED_ALPHA);
+            renderRigged(avatarp, RIGGED_FULLBRIGHT_ALPHA);
+            renderRigged(avatarp, RIGGED_GLOW);
+            renderRigged(avatarp, RIGGED_SPECMAP_BLEND);
+            renderRigged(avatarp, RIGGED_NORMMAP_BLEND);
+            renderRigged(avatarp, RIGGED_NORMSPEC_BLEND);
+            LLDrawPoolAvatar::sSkipOpaque = false;
+        }
+        else if (pass == SHADOW_PASS_ATTACHMENT_ALPHA_MASK) // rigged alpha mask
+        {
+            LLDrawPoolAvatar::sSkipOpaque = true;
+            renderRigged(avatarp, RIGGED_MATERIAL_ALPHA_MASK);
+            renderRigged(avatarp, RIGGED_NORMMAP_MASK);
+            renderRigged(avatarp, RIGGED_SPECMAP_MASK);
+            renderRigged(avatarp, RIGGED_NORMSPEC_MASK);
+            renderRigged(avatarp, RIGGED_GLOW);
+            LLDrawPoolAvatar::sSkipOpaque = false;
+        }
+        else // rigged opaque (SHADOW_PASS_ATTACHMENT_OPAQUE
+        {
+            LLDrawPoolAvatar::sSkipTransparent = true;
+            renderRigged(avatarp, RIGGED_MATERIAL);
+            renderRigged(avatarp, RIGGED_SPECMAP);
+            renderRigged(avatarp, RIGGED_SPECMAP_EMISSIVE);
+            renderRigged(avatarp, RIGGED_NORMMAP);
+            renderRigged(avatarp, RIGGED_NORMMAP_EMISSIVE);
+            renderRigged(avatarp, RIGGED_NORMSPEC);
+            renderRigged(avatarp, RIGGED_NORMSPEC_EMISSIVE);
+            renderRigged(avatarp, RIGGED_SIMPLE);
+            renderRigged(avatarp, RIGGED_FULLBRIGHT);
+            renderRigged(avatarp, RIGGED_SHINY);
+            renderRigged(avatarp, RIGGED_FULLBRIGHT_SHINY);
+            renderRigged(avatarp, RIGGED_GLOW);
+            renderRigged(avatarp, RIGGED_DEFERRED_BUMP);
+            renderRigged(avatarp, RIGGED_DEFERRED_SIMPLE);
+            LLDrawPoolAvatar::sSkipTransparent = false;
+        }
+    }
 }
 
 S32 LLDrawPoolAvatar::getNumPasses()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	if (LLPipeline::sImpostorRender)
 	{
 		return 8;
@@ -660,6 +697,8 @@ S32 LLDrawPoolAvatar::getNumPasses()
 
 S32 LLDrawPoolAvatar::getNumDeferredPasses()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	if (LLPipeline::sImpostorRender)
 	{
 		return 19;
@@ -780,6 +819,8 @@ void LLDrawPoolAvatar::endRenderPass(S32 pass)
 
 void LLDrawPoolAvatar::beginImpostor()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	if (!LLPipeline::sReflectionRender)
 	{
 		LLVOAvatar::sRenderDistance = llclamp(LLVOAvatar::sRenderDistance, 16.f, 256.f);
@@ -798,6 +839,8 @@ void LLDrawPoolAvatar::beginImpostor()
 
 void LLDrawPoolAvatar::endImpostor()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	if (LLGLSLShader::sNoFixedFunction)
 	{
 		gImpostorProgram.unbind();
@@ -807,6 +850,8 @@ void LLDrawPoolAvatar::endImpostor()
 
 void LLDrawPoolAvatar::beginRigid()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	if (gPipeline.canUseVertexShaders())
 	{
 		if (LLPipeline::sUnderWaterRender)
@@ -840,6 +885,8 @@ void LLDrawPoolAvatar::beginRigid()
 
 void LLDrawPoolAvatar::endRigid()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	sShaderLevel = mShaderLevel;
 	if (sVertexProgram != NULL)
 	{
@@ -849,6 +896,8 @@ void LLDrawPoolAvatar::endRigid()
 
 void LLDrawPoolAvatar::beginDeferredImpostor()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	if (!LLPipeline::sReflectionRender)
 	{
 		LLVOAvatar::sRenderDistance = llclamp(LLVOAvatar::sRenderDistance, 16.f, 256.f);
@@ -865,6 +914,8 @@ void LLDrawPoolAvatar::beginDeferredImpostor()
 
 void LLDrawPoolAvatar::endDeferredImpostor()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	sShaderLevel = mShaderLevel;
 	sVertexProgram->disableTexture(LLViewerShaderMgr::DEFERRED_NORMAL);
 	sVertexProgram->disableTexture(LLViewerShaderMgr::SPECULAR_MAP);
@@ -876,6 +927,8 @@ void LLDrawPoolAvatar::endDeferredImpostor()
 
 void LLDrawPoolAvatar::beginDeferredRigid()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	sVertexProgram = &gDeferredNonIndexedDiffuseAlphaMaskNoColorProgram;
 	sDiffuseChannel = sVertexProgram->enableTexture(LLViewerShaderMgr::DIFFUSE_MAP);
 	sVertexProgram->bind();
@@ -892,6 +945,8 @@ void LLDrawPoolAvatar::beginDeferredRigid()
 
 void LLDrawPoolAvatar::endDeferredRigid()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	sShaderLevel = mShaderLevel;
 	sVertexProgram->disableTexture(LLViewerShaderMgr::DIFFUSE_MAP);
 	sVertexProgram->unbind();
@@ -901,6 +956,8 @@ void LLDrawPoolAvatar::endDeferredRigid()
 
 void LLDrawPoolAvatar::beginSkinned()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	if (sShaderLevel > 0)
 	{
 		if (LLPipeline::sUnderWaterRender)
@@ -967,6 +1024,8 @@ void LLDrawPoolAvatar::beginSkinned()
 
 void LLDrawPoolAvatar::endSkinned()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	// if we're in software-blending, remember to set the fence _after_ we draw so we wait till this rendering is done
 	if (sShaderLevel > 0)
 	{
@@ -991,6 +1050,8 @@ void LLDrawPoolAvatar::endSkinned()
 
 void LLDrawPoolAvatar::beginRiggedSimple()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	if (sShaderLevel > 0)
 	{
 		if (LLPipeline::sUnderWaterRender)
@@ -1031,6 +1092,8 @@ void LLDrawPoolAvatar::beginRiggedSimple()
 
 void LLDrawPoolAvatar::endRiggedSimple()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	LLVertexBuffer::unbind();
 	if (sShaderLevel > 0 || gPipeline.canUseVertexShaders())
 	{
@@ -1041,27 +1104,37 @@ void LLDrawPoolAvatar::endRiggedSimple()
 
 void LLDrawPoolAvatar::beginRiggedAlpha()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	beginRiggedSimple();
 }
 
 void LLDrawPoolAvatar::endRiggedAlpha()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	endRiggedSimple();
 }
 
 
 void LLDrawPoolAvatar::beginRiggedFullbrightAlpha()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	beginRiggedFullbright();
 }
 
 void LLDrawPoolAvatar::endRiggedFullbrightAlpha()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	endRiggedFullbright();
 }
 
 void LLDrawPoolAvatar::beginRiggedGlow()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	if (sShaderLevel > 0)
 	{
 		if (LLPipeline::sUnderWaterRender)
@@ -1108,11 +1181,15 @@ void LLDrawPoolAvatar::beginRiggedGlow()
 
 void LLDrawPoolAvatar::endRiggedGlow()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	endRiggedFullbright();
 }
 
 void LLDrawPoolAvatar::beginRiggedFullbright()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	if (sShaderLevel > 0)
 	{
 		if (LLPipeline::sUnderWaterRender)
@@ -1170,6 +1247,8 @@ void LLDrawPoolAvatar::beginRiggedFullbright()
 
 void LLDrawPoolAvatar::endRiggedFullbright()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	LLVertexBuffer::unbind();
 	if (sShaderLevel > 0 || gPipeline.canUseVertexShaders())
 	{
@@ -1180,6 +1259,8 @@ void LLDrawPoolAvatar::endRiggedFullbright()
 
 void LLDrawPoolAvatar::beginRiggedShinySimple()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	if (sShaderLevel > 0)
 	{
 		if (LLPipeline::sUnderWaterRender)
@@ -1220,6 +1301,8 @@ void LLDrawPoolAvatar::beginRiggedShinySimple()
 
 void LLDrawPoolAvatar::endRiggedShinySimple()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	LLVertexBuffer::unbind();
 	if (sShaderLevel > 0 || gPipeline.canUseVertexShaders())
 	{
@@ -1231,6 +1314,8 @@ void LLDrawPoolAvatar::endRiggedShinySimple()
 
 void LLDrawPoolAvatar::beginRiggedFullbrightShiny()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	if (sShaderLevel > 0)
 	{
 		if (LLPipeline::sUnderWaterRender)
@@ -1296,6 +1381,8 @@ void LLDrawPoolAvatar::beginRiggedFullbrightShiny()
 
 void LLDrawPoolAvatar::endRiggedFullbrightShiny()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	LLVertexBuffer::unbind();
 	if (sShaderLevel > 0 || gPipeline.canUseVertexShaders())
 	{
@@ -1308,6 +1395,8 @@ void LLDrawPoolAvatar::endRiggedFullbrightShiny()
 
 void LLDrawPoolAvatar::beginDeferredRiggedSimple()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	sVertexProgram = &gDeferredSkinnedDiffuseProgram;
 	sDiffuseChannel = 0;
 	sVertexProgram->bind();
@@ -1323,6 +1412,8 @@ void LLDrawPoolAvatar::beginDeferredRiggedSimple()
 
 void LLDrawPoolAvatar::endDeferredRiggedSimple()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	LLVertexBuffer::unbind();
 	sVertexProgram->unbind();
 	sVertexProgram = NULL;
@@ -1330,6 +1421,8 @@ void LLDrawPoolAvatar::endDeferredRiggedSimple()
 
 void LLDrawPoolAvatar::beginDeferredRiggedBump()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	sVertexProgram = &gDeferredSkinnedBumpProgram;
 	sVertexProgram->bind();
     if (LLPipeline::sRenderingHUDs)
@@ -1346,6 +1439,8 @@ void LLDrawPoolAvatar::beginDeferredRiggedBump()
 
 void LLDrawPoolAvatar::endDeferredRiggedBump()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	LLVertexBuffer::unbind();
 	sVertexProgram->disableTexture(LLViewerShaderMgr::BUMP_MAP);
 	sVertexProgram->disableTexture(LLViewerShaderMgr::DIFFUSE_MAP);
@@ -1357,6 +1452,8 @@ void LLDrawPoolAvatar::endDeferredRiggedBump()
 
 void LLDrawPoolAvatar::beginDeferredRiggedMaterial(S32 pass)
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	if (pass == 1 ||
 		pass == 5 ||
 		pass == 9 ||
@@ -1387,6 +1484,8 @@ void LLDrawPoolAvatar::beginDeferredRiggedMaterial(S32 pass)
 
 void LLDrawPoolAvatar::endDeferredRiggedMaterial(S32 pass)
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	if (pass == 1 ||
 		pass == 5 ||
 		pass == 9 ||
@@ -1407,6 +1506,8 @@ void LLDrawPoolAvatar::endDeferredRiggedMaterial(S32 pass)
 
 void LLDrawPoolAvatar::beginDeferredSkinned()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	sShaderLevel = mShaderLevel;
 	sVertexProgram = &gDeferredAvatarProgram;
 	sRenderingSkinned = TRUE;
@@ -1428,6 +1529,8 @@ void LLDrawPoolAvatar::beginDeferredSkinned()
 
 void LLDrawPoolAvatar::endDeferredSkinned()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	// if we're in software-blending, remember to set the fence _after_ we draw so we wait till this rendering is done
 	sRenderingSkinned = FALSE;
 	sVertexProgram->unbind();
@@ -1740,6 +1843,8 @@ void LLDrawPoolAvatar::getRiggedGeometry(
     LLVolume* volume,
     const LLVolumeFace& vol_face)
 {
+    LL_PROFILE_ZONE_SCOPED
+
     face->setGeomIndex(0);
     face->setIndicesIndex(0);
 
@@ -1794,7 +1899,7 @@ void LLDrawPoolAvatar::getRiggedGeometry(
 
 	U16 offset = 0;
 		
-	LLMatrix4 mat_vert = skin->mBindShapeMatrix;
+	LLMatrix4 mat_vert = LLMatrix4(skin->mBindShapeMatrix);
 	glh::matrix4f m((F32*) mat_vert.mMatrix);
 	m = m.inverse().transpose();
 		
@@ -1836,24 +1941,26 @@ void LLDrawPoolAvatar::getRiggedGeometry(
 void LLDrawPoolAvatar::updateRiggedFaceVertexBuffer(
     LLVOAvatar* avatar,
     LLFace* face,
-    const LLMeshSkinInfo* skin,
+    const LLVOVolume* vobj,
     LLVolume* volume,
     LLVolumeFace& vol_face)
 {
+    LL_PROFILE_ZONE_SCOPED;
+
 	LLVector4a* weights = vol_face.mWeights;
 	if (!weights)
 	{
 		return;
 	}
 
+    if (!vobj || vobj->isNoLOD())
+    {
+        return;
+    }
+
 	LLPointer<LLVertexBuffer> buffer = face->getVertexBuffer();
 	LLDrawable* drawable = face->getDrawable();
 
-	if (drawable->getVOVolume() && drawable->getVOVolume()->isNoLOD())
-	{
-		return;
-	}
-
     const U32 max_joints = LLSkinningUtil::getMaxJointCount();
 
 #if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
@@ -1893,23 +2000,26 @@ void LLDrawPoolAvatar::updateRiggedFaceVertexBuffer(
     }
 #endif
 
-    // FIXME ugly const cast
-    LLSkinningUtil::scrubInvalidJoints(avatar, const_cast<LLMeshSkinInfo*>(skin));
-
-	U32 data_mask = face->getRiggedVertexBufferDataMask();
+    U32 data_mask = face->getRiggedVertexBufferDataMask();
+    const LLMeshSkinInfo* skin = nullptr;
 
-    if (!vol_face.mWeightsScrubbed)
-    {
-        LLSkinningUtil::scrubSkinWeights(weights, vol_face.mNumVertices, skin);
-        vol_face.mWeightsScrubbed = TRUE;
-    }
-	
 	if (buffer.isNull() || 
 		buffer->getTypeMask() != data_mask ||
 		buffer->getNumVerts() != vol_face.mNumVertices ||
 		buffer->getNumIndices() != vol_face.mNumIndices ||
 		(drawable && drawable->isState(LLDrawable::REBUILD_ALL)))
 	{
+        LL_PROFILE_ZONE_NAMED("Rigged VBO Rebuild");
+        skin = vobj->getSkinInfo();
+        // FIXME ugly const cast
+        LLSkinningUtil::scrubInvalidJoints(avatar, const_cast<LLMeshSkinInfo*>(skin));
+
+        if (!vol_face.mWeightsScrubbed)
+        {
+            LLSkinningUtil::scrubSkinWeights(weights, vol_face.mNumVertices, skin);
+            vol_face.mWeightsScrubbed = TRUE;
+        }
+
 		if (drawable && drawable->isState(LLDrawable::REBUILD_ALL))
 		{
             //rebuild EVERY face in the drawable, not just this one, to avoid missing drawable wide rebuild issues
@@ -1935,18 +2045,13 @@ void LLDrawPoolAvatar::updateRiggedFaceVertexBuffer(
 		}
 	}
 
-	if (buffer.isNull() ||
-		buffer->getNumVerts() != vol_face.mNumVertices ||
-		buffer->getNumIndices() != vol_face.mNumIndices)
-	{
-		// Allocation failed
-		return;
-	}
-
-	if (!buffer.isNull() && 
-		sShaderLevel <= 0 && 
-		face->mLastSkinTime < avatar->getLastSkinTime())
+	if (sShaderLevel <= 0 && 
+        face->mLastSkinTime < avatar->getLastSkinTime() &&
+        !buffer.isNull() &&
+        buffer->getNumVerts() == vol_face.mNumVertices &&
+        buffer->getNumIndices() == vol_face.mNumIndices)
 	{
+        LL_PROFILE_ZONE_NAMED("Software Skinning");
 		//perform software vertex skinning for this face
 		LLStrider<LLVector3> position;
 		LLStrider<LLVector3> normal;
@@ -1963,14 +2068,16 @@ void LLDrawPoolAvatar::updateRiggedFaceVertexBuffer(
 
 		LLVector4a* norm = has_normal ? (LLVector4a*) normal.get() : NULL;
 		
-		//build matrix palette
-		LLMatrix4a mat[LL_MAX_JOINTS_PER_MESH_OBJECT];
-        U32 count = LLSkinningUtil::getMeshJointCount(skin);
-        LLSkinningUtil::initSkinningMatrixPalette((LLMatrix4*)mat, count, skin, avatar);
-        LLSkinningUtil::checkSkinWeights(weights, buffer->getNumVerts(), skin);
+        if (skin == nullptr)
+        {
+            skin = vobj->getSkinInfo();
+        }
 
-		LLMatrix4a bind_shape_matrix;
-		bind_shape_matrix.loadu(skin->mBindShapeMatrix);
+        const MatrixPaletteCache& mpc = updateSkinInfoMatrixPalette(avatar, skin);
+        const LLMatrix4a* mat = &(mpc.mMatrixPalette[0]);
+
+        LLSkinningUtil::checkSkinWeights(weights, buffer->getNumVerts(), skin);
+		const LLMatrix4a& bind_shape_matrix = skin->mBindShapeMatrix;
 
 #if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
         U8* joint_indices_cursor = vol_face.mJointIndices;
@@ -2038,6 +2145,8 @@ void LLDrawPoolAvatar::updateRiggedFaceVertexBuffer(
 
 void LLDrawPoolAvatar::renderRigged(LLVOAvatar* avatar, U32 type, bool glow)
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	if (!avatar->shouldRenderRigged())
 	{
 		return;
@@ -2045,15 +2154,18 @@ void LLDrawPoolAvatar::renderRigged(LLVOAvatar* avatar, U32 type, bool glow)
 
 	stop_glerror();
 
+    const LLMeshSkinInfo* lastSkin = nullptr;
+
 	for (U32 i = 0; i < mRiggedFace[type].size(); ++i)
 	{
+        LL_PROFILE_ZONE_NAMED("Render Rigged Face");
 		LLFace* face = mRiggedFace[type][i];
 
         S32 offset = face->getIndicesStart();
 		U32 count = face->getIndicesCount();
 
         U16 start = face->getGeomStart();
-		U16 end = start + face->getGeomCount()-1;			
+		U16 end = start + face->getGeomCount()-1;
 
 		LLDrawable* drawable = face->getDrawable();
 		if (!drawable)
@@ -2175,52 +2287,32 @@ void LLDrawPoolAvatar::renderRigged(LLVOAvatar* avatar, U32 type, bool glow)
         }
 
 		if (buff)
-		{        
+		{
 			if (sShaderLevel > 0)
 			{
-                // upload matrix palette to shader
-				LLMatrix4a mat[LL_MAX_JOINTS_PER_MESH_OBJECT];
-				U32 count = LLSkinningUtil::getMeshJointCount(skin);
-                LLSkinningUtil::initSkinningMatrixPalette((LLMatrix4*)mat, count, skin, avatar);
-
-				stop_glerror();
-
-				F32 mp[LL_MAX_JOINTS_PER_MESH_OBJECT*12];
-
-				for (U32 i = 0; i < count; ++i)
-				{
-					F32* m = (F32*) mat[i].mMatrix[0].getF32ptr();
-
-					U32 idx = i*12;
-
-					mp[idx+0] = m[0];
-					mp[idx+1] = m[1];
-					mp[idx+2] = m[2];
-					mp[idx+3] = m[12];
+                if (lastSkin != skin) // <== only upload matrix palette to GL if the skininfo changed
+                {
+                    // upload matrix palette to shader
+                    const MatrixPaletteCache& mpc = updateSkinInfoMatrixPalette(avatar, skin);
+                    U32 count = mpc.mMatrixPalette.size();
 
-					mp[idx+4] = m[4];
-					mp[idx+5] = m[5];
-					mp[idx+6] = m[6];
-					mp[idx+7] = m[13];
+                    stop_glerror();
 
-					mp[idx+8] = m[8];
-					mp[idx+9] = m[9];
-					mp[idx+10] = m[10];
-					mp[idx+11] = m[14];
-				}
+                    LLDrawPoolAvatar::sVertexProgram->uniformMatrix3x4fv(LLViewerShaderMgr::AVATAR_MATRIX,
+                        count,
+                        FALSE,
+                        (GLfloat*) &(mpc.mGLMp[0]));
 
-				LLDrawPoolAvatar::sVertexProgram->uniformMatrix3x4fv(LLViewerShaderMgr::AVATAR_MATRIX, 
-					count,
-					FALSE,
-					(GLfloat*) mp);
-
-				stop_glerror();
+                    stop_glerror();
+                }
 			}
 			else
 			{
 				data_mask &= ~LLVertexBuffer::MAP_WEIGHT4;
 			}
 
+            lastSkin = skin;
+
 			/*if (glow)
 			{
 				gGL.diffuseColor4f(0,0,0,face->getTextureEntry()->getGlow());
@@ -2330,16 +2422,22 @@ void LLDrawPoolAvatar::renderRigged(LLVOAvatar* avatar, U32 type, bool glow)
 
 void LLDrawPoolAvatar::renderDeferredRiggedSimple(LLVOAvatar* avatar)
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	renderRigged(avatar, RIGGED_DEFERRED_SIMPLE);
 }
 
 void LLDrawPoolAvatar::renderDeferredRiggedBump(LLVOAvatar* avatar)
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	renderRigged(avatar, RIGGED_DEFERRED_BUMP);
 }
 
 void LLDrawPoolAvatar::renderDeferredRiggedMaterial(LLVOAvatar* avatar, S32 pass)
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	renderRigged(avatar, pass);
 }
 
@@ -2352,8 +2450,10 @@ void LLDrawPoolAvatar::updateRiggedVertexBuffers(LLVOAvatar* avatar)
 	//update rigged vertex buffers
 	for (U32 type = 0; type < NUM_RIGGED_PASSES; ++type)
 	{
+        LL_PROFILE_ZONE_NAMED("Pass");
 		for (U32 i = 0; i < mRiggedFace[type].size(); ++i)
 		{
+            LL_PROFILE_ZONE_NAMED("Face");
 			LLFace* face = mRiggedFace[type][i];
 			LLDrawable* drawable = face->getDrawable();
 			if (!drawable)
@@ -2376,43 +2476,111 @@ void LLDrawPoolAvatar::updateRiggedVertexBuffers(LLVOAvatar* avatar)
 				continue;
 			}
 
-			const LLMeshSkinInfo* skin = vobj->getSkinInfo();
-			if (!skin)
-			{
-				continue;
-			}
-
 			stop_glerror();
 
 			LLVolumeFace& vol_face = volume->getVolumeFace(te);
-			updateRiggedFaceVertexBuffer(avatar, face, skin, volume, vol_face);
+			updateRiggedFaceVertexBuffer(avatar, face, vobj, volume, vol_face);
 		}
 	}
 }
 
+void LLDrawPoolAvatar::updateSkinInfoMatrixPalettes(LLVOAvatar* avatarp)
+{
+    LL_PROFILE_ZONE_SCOPED;
+    //evict matrix palettes from the cache that haven't been updated in 10 frames
+    for (matrix_palette_cache_t::iterator iter = mMatrixPaletteCache.begin(); iter != mMatrixPaletteCache.end(); )
+    {
+        if (gFrameCount - iter->second.mFrame > 10)
+        {
+            iter = mMatrixPaletteCache.erase(iter);
+        }
+        else
+        {
+            ++iter;
+        }
+    }
+}
+
+const LLDrawPoolAvatar::MatrixPaletteCache& LLDrawPoolAvatar::updateSkinInfoMatrixPalette(LLVOAvatar * avatarp, const LLMeshSkinInfo* skin)
+{
+    MatrixPaletteCache& entry = mMatrixPaletteCache[skin];
+
+    if (entry.mFrame != gFrameCount)
+    {
+        LL_PROFILE_ZONE_SCOPED;
+        entry.mFrame = gFrameCount;
+        //build matrix palette
+        U32 count = LLSkinningUtil::getMeshJointCount(skin);
+        entry.mMatrixPalette.resize(count);
+        LLSkinningUtil::initSkinningMatrixPalette(&(entry.mMatrixPalette[0]), count, skin, avatarp);
+
+        const LLMatrix4a* mat = &(entry.mMatrixPalette[0]);
+
+        stop_glerror();
+        
+        entry.mGLMp.resize(count * 12);
+
+        F32* mp = &(entry.mGLMp[0]);
+        
+        for (U32 i = 0; i < count; ++i)
+        {
+            F32* m = (F32*)mat[i].mMatrix[0].getF32ptr();
+
+            U32 idx = i * 12;
+
+            mp[idx + 0] = m[0];
+            mp[idx + 1] = m[1];
+            mp[idx + 2] = m[2];
+            mp[idx + 3] = m[12];
+
+            mp[idx + 4] = m[4];
+            mp[idx + 5] = m[5];
+            mp[idx + 6] = m[6];
+            mp[idx + 7] = m[13];
+
+            mp[idx + 8] = m[8];
+            mp[idx + 9] = m[9];
+            mp[idx + 10] = m[10];
+            mp[idx + 11] = m[14];
+        }
+    }
+
+    return entry;
+}
+
 void LLDrawPoolAvatar::renderRiggedSimple(LLVOAvatar* avatar)
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	renderRigged(avatar, RIGGED_SIMPLE);
 }
 
 void LLDrawPoolAvatar::renderRiggedFullbright(LLVOAvatar* avatar)
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	renderRigged(avatar, RIGGED_FULLBRIGHT);
 }
 
 	
 void LLDrawPoolAvatar::renderRiggedShinySimple(LLVOAvatar* avatar)
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	renderRigged(avatar, RIGGED_SHINY);
 }
 
 void LLDrawPoolAvatar::renderRiggedFullbrightShiny(LLVOAvatar* avatar)
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	renderRigged(avatar, RIGGED_FULLBRIGHT_SHINY);
 }
 
 void LLDrawPoolAvatar::renderRiggedAlpha(LLVOAvatar* avatar)
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	if (!mRiggedFace[RIGGED_ALPHA].empty())
 	{
 		LLGLEnable blend(GL_BLEND);
@@ -2430,6 +2598,8 @@ void LLDrawPoolAvatar::renderRiggedAlpha(LLVOAvatar* avatar)
 
 void LLDrawPoolAvatar::renderRiggedFullbrightAlpha(LLVOAvatar* avatar)
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	if (!mRiggedFace[RIGGED_FULLBRIGHT_ALPHA].empty())
 	{
 		LLGLEnable blend(GL_BLEND);
@@ -2447,6 +2617,8 @@ void LLDrawPoolAvatar::renderRiggedFullbrightAlpha(LLVOAvatar* avatar)
 
 void LLDrawPoolAvatar::renderRiggedGlow(LLVOAvatar* avatar)
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	if (!mRiggedFace[RIGGED_GLOW].empty())
 	{
 		LLGLEnable blend(GL_BLEND);
@@ -2474,6 +2646,8 @@ void LLDrawPoolAvatar::renderRiggedGlow(LLVOAvatar* avatar)
 //-----------------------------------------------------------------------------
 LLViewerTexture *LLDrawPoolAvatar::getDebugTexture()
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	if (mReferences.empty())
 	{
 		return NULL;
@@ -2497,6 +2671,8 @@ LLColor3 LLDrawPoolAvatar::getDebugColor() const
 
 void LLDrawPoolAvatar::addRiggedFace(LLFace* facep, U32 type)
 {
+    LL_PROFILE_ZONE_SCOPED
+
     llassert (facep->isState(LLFace::RIGGED));
     llassert(getType() == LLDrawPool::POOL_AVATAR || getType() == LLDrawPool::POOL_CONTROL_AV);
     if (facep->getPool() && facep->getPool() != this)
@@ -2519,6 +2695,8 @@ void LLDrawPoolAvatar::addRiggedFace(LLFace* facep, U32 type)
 
 void LLDrawPoolAvatar::removeRiggedFace(LLFace* facep)
 {
+    LL_PROFILE_ZONE_SCOPED
+
     llassert (facep->isState(LLFace::RIGGED));
     llassert(getType() == LLDrawPool::POOL_AVATAR || getType() == LLDrawPool::POOL_CONTROL_AV);
     if (facep->getPool() != this)
@@ -2556,7 +2734,7 @@ LLVertexBufferAvatar::LLVertexBufferAvatar()
 : LLVertexBuffer(sDataMask, 
 	GL_STREAM_DRAW_ARB) //avatars are always stream draw due to morph targets
 {
-
+    LL_PROFILE_ZONE_SCOPED
 }
 
 
diff --git a/indra/newview/lldrawpoolavatar.h b/indra/newview/lldrawpoolavatar.h
index 92a8538958..0c1ee2cced 100644
--- a/indra/newview/lldrawpoolavatar.h
+++ b/indra/newview/lldrawpoolavatar.h
@@ -28,14 +28,18 @@
 #define LL_LLDRAWPOOLAVATAR_H
 
 #include "lldrawpool.h"
+#include "llmodel.h"
+
+#include <unordered_map>
 
 class LLVOAvatar;
+class LLVOVolume;
 class LLGLSLShader;
 class LLFace;
-class LLMeshSkinInfo;
 class LLVolume;
 class LLVolumeFace;
 
+extern U32 gFrameCount;
 
 class LLDrawPoolAvatar : public LLFacePool
 {
@@ -253,11 +257,13 @@ typedef enum
 	void getRiggedGeometry(LLFace* face, LLPointer<LLVertexBuffer>& buffer, U32 data_mask, const LLMeshSkinInfo* skin, LLVolume* volume, const LLVolumeFace& vol_face);
 	void updateRiggedFaceVertexBuffer(LLVOAvatar* avatar,
 									  LLFace* facep, 
-									  const LLMeshSkinInfo* skin, 
+									  const LLVOVolume* vobj,
 									  LLVolume* volume,
 									  LLVolumeFace& vol_face);
 	void updateRiggedVertexBuffers(LLVOAvatar* avatar);
 
+    void updateSkinInfoMatrixPalettes(LLVOAvatar* avatarp);
+
 	void renderRigged(LLVOAvatar* avatar, U32 type, bool glow = false);
 	void renderRiggedSimple(LLVOAvatar* avatar);
 	void renderRiggedAlpha(LLVOAvatar* avatar);
@@ -277,6 +283,26 @@ typedef enum
 
 	std::vector<LLFace*> mRiggedFace[NUM_RIGGED_PASSES];
 
+    class MatrixPaletteCache
+    {
+    public:
+        U32 mFrame;
+        LLMeshSkinInfo::matrix_list_t mMatrixPalette;
+        
+        // Float array ready to be sent to GL
+        std::vector<F32> mGLMp;
+
+        MatrixPaletteCache() :
+            mFrame(gFrameCount-1)
+        {
+        }
+    };
+    
+    const MatrixPaletteCache& updateSkinInfoMatrixPalette(LLVOAvatar* avatarp, const LLMeshSkinInfo* skin);
+
+    typedef std::unordered_map<const LLMeshSkinInfo*, MatrixPaletteCache> matrix_palette_cache_t;
+    matrix_palette_cache_t mMatrixPaletteCache;
+
 	/*virtual*/ LLViewerTexture *getDebugTexture();
 	/*virtual*/ LLColor3 getDebugColor() const; // For AGP debug display
 
diff --git a/indra/newview/lldrawpoolsimple.cpp b/indra/newview/lldrawpoolsimple.cpp
index f211cf6e27..74e6665a96 100644
--- a/indra/newview/lldrawpoolsimple.cpp
+++ b/indra/newview/lldrawpoolsimple.cpp
@@ -199,11 +199,7 @@ void LLDrawPoolSimple::beginRenderPass(S32 pass)
 	}
 	else 
 	{
-		// don't use shaders!
-		if (gGLManager.mHasShaderObjects)
-		{
-			LLGLSLShader::bindNoShader();
-		}		
+		LLGLSLShader::bindNoShader();
 	}
 }
 
@@ -301,11 +297,7 @@ void LLDrawPoolAlphaMask::beginRenderPass(S32 pass)
 	}
 	else 
 	{
-		// don't use shaders!
-		if (gGLManager.mHasShaderObjects)
-		{
-			LLGLSLShader::bindNoShader();
-		}		
+		LLGLSLShader::bindNoShader();
 	}
 }
 
@@ -392,11 +384,7 @@ void LLDrawPoolFullbrightAlphaMask::beginRenderPass(S32 pass)
 	}
 	else 
 	{
-		// don't use shaders!
-		if (gGLManager.mHasShaderObjects)
-		{
-			LLGLSLShader::bindNoShader();
-		}		
+		LLGLSLShader::bindNoShader();
 	}
 }
 
@@ -567,11 +555,7 @@ void LLDrawPoolGrass::beginRenderPass(S32 pass)
 	else 
 	{
 		gGL.setAlphaRejectSettings(LLRender::CF_GREATER, 0.5f);
-		// don't use shaders!
-		if (gGLManager.mHasShaderObjects)
-		{
-			LLGLSLShader::bindNoShader();
-		}		
+		LLGLSLShader::bindNoShader();
 	}
 }
 
diff --git a/indra/newview/lldrawpoolsky.cpp b/indra/newview/lldrawpoolsky.cpp
index b6f55e800a..b1eefaab81 100644
--- a/indra/newview/lldrawpoolsky.cpp
+++ b/indra/newview/lldrawpoolsky.cpp
@@ -82,13 +82,7 @@ void LLDrawPoolSky::render(S32 pass)
 	}
 	else
 	{
-		// don't use shaders!
-		if (gGLManager.mHasShaderObjects)
-		{
-			// Ironically, we must support shader objects to be
-			// able to use this call.
-			LLGLSLShader::bindNoShader();
-		}
+		LLGLSLShader::bindNoShader();
 		mShader = NULL;
 	}
 	
diff --git a/indra/newview/llexperiencelog.cpp b/indra/newview/llexperiencelog.cpp
index ee5d561927..c441fbc09f 100644
--- a/indra/newview/llexperiencelog.cpp
+++ b/indra/newview/llexperiencelog.cpp
@@ -149,10 +149,6 @@ std::string LLExperienceLog::getPermissionString( const LLSD& message, const std
 		{
 			buf.str(entry);
 		}
-		else
-		{
-			buf.str();
-		}
 	}
 
 	if(buf.str().empty())
diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp
index 4a802ad9aa..34448a780d 100644
--- a/indra/newview/llface.cpp
+++ b/indra/newview/llface.cpp
@@ -241,6 +241,8 @@ void LLFace::setPool(LLFacePool* pool)
 
 void LLFace::setPool(LLFacePool* new_pool, LLViewerTexture *texturep)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	if (!new_pool)
 	{
 		LL_ERRS() << "Setting pool to null!" << LL_ENDL;
@@ -320,6 +322,8 @@ void LLFace::setSpecularMap(LLViewerTexture* tex)
 
 void LLFace::dirtyTexture()
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	LLDrawable* drawablep = getDrawable();
 
 	if (mVObjp.notNull() && mVObjp->getVolume())
@@ -535,6 +539,8 @@ void LLFace::updateCenterAgent()
 
 void LLFace::renderSelected(LLViewerTexture *imagep, const LLColor4& color)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	if (mDrawablep == NULL || mDrawablep->getSpatialGroup() == NULL)
 	{
 		return;
@@ -585,6 +591,7 @@ void LLFace::renderSelected(LLViewerTexture *imagep, const LLColor4& color)
 						glTexCoordPointer(2, GL_FLOAT, 8, vol_face.mTexCoords);
 					}
 					gGL.syncMatrices();
+					LL_PROFILER_GPU_ZONEC( "gl.DrawElements", 0x00FF00 );
 					glDrawElements(GL_TRIANGLES, vol_face.mNumIndices, GL_UNSIGNED_SHORT, vol_face.mIndices);
 					glDisableClientState(GL_TEXTURE_COORD_ARRAY);
 				}
@@ -605,6 +612,8 @@ void LLFace::renderSelected(LLViewerTexture *imagep, const LLColor4& color)
 
 void renderFace(LLDrawable* drawable, LLFace *face)
 {
+	LL_PROFILE_ZONE_SCOPED
+
     LLVOVolume* vobj = drawable->getVOVolume();
     if (vobj)
     {
@@ -891,6 +900,8 @@ bool less_than_max_mag(const LLVector4a& vec)
 BOOL LLFace::genVolumeBBoxes(const LLVolume &volume, S32 f,
                              const LLMatrix4& mat_vert_in, BOOL global_volume)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	//get bounding box
 	if (mDrawablep->isState(LLDrawable::REBUILD_VOLUME | LLDrawable::REBUILD_POSITION | LLDrawable::REBUILD_RIGGED))
 	{
@@ -2375,6 +2386,8 @@ F32 LLFace::getTextureVirtualSize()
 
 BOOL LLFace::calcPixelArea(F32& cos_angle_to_view_dir, F32& radius)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	//VECTORIZE THIS
 	//get area of circle around face
 	LLVector4a center;
@@ -2654,6 +2667,8 @@ const LLMatrix4& LLFace::getRenderMatrix() const
 
 S32 LLFace::renderElements(const U16 *index_array) const
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	S32 ret = 0;
 	
 	if (isState(GLOBAL))
@@ -2673,6 +2688,8 @@ S32 LLFace::renderElements(const U16 *index_array) const
 
 S32 LLFace::renderIndexed()
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	if(mDrawablep == NULL || mDrawPoolp == NULL)
 	{
 		return 0;
@@ -2683,6 +2700,8 @@ S32 LLFace::renderIndexed()
 
 S32 LLFace::renderIndexed(U32 mask)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	if (mVertexBuffer.isNull())
 	{
 		return 0;
diff --git a/indra/newview/llfeaturemanager.cpp b/indra/newview/llfeaturemanager.cpp
index e6bbe234b3..98c8531cd6 100644
--- a/indra/newview/llfeaturemanager.cpp
+++ b/indra/newview/llfeaturemanager.cpp
@@ -670,11 +670,7 @@ void LLFeatureManager::applyBaseMasks()
 	}
 
 	// now all those wacky ones
-	if (!gGLManager.mHasFragmentShader)
-	{
-		maskFeatures("NoPixelShaders");
-	}
-	if (!gGLManager.mHasVertexShader || !mGPUSupported)
+	if (!mGPUSupported)
 	{
 		maskFeatures("NoVertexShaders");
 	}
diff --git a/indra/newview/llfloatermodelpreview.cpp b/indra/newview/llfloatermodelpreview.cpp
index d9edd4dc30..0e54b66ea9 100644
--- a/indra/newview/llfloatermodelpreview.cpp
+++ b/indra/newview/llfloatermodelpreview.cpp
@@ -1357,31 +1357,31 @@ void LLFloaterModelPreview::clearAvatarTab()
 			}
 
 void LLFloaterModelPreview::updateAvatarTab(bool highlight_overrides)
-			{
+{
     S32 display_lod = mModelPreview->mPreviewLOD;
     if (mModelPreview->mModel[display_lod].empty())
-				{
+    {
         mSelectedJointName.clear();
         return;
-					}
+    }
 
     // Joints will be listed as long as they are listed in mAlternateBindMatrix
     // even if they are for some reason identical to defaults.
     // Todo: Are overrides always identical for all lods? They normally are, but there might be situations where they aren't.
     if (mJointOverrides[display_lod].empty())
-					{
+    {
         // populate map
         for (LLModelLoader::scene::iterator iter = mModelPreview->mScene[display_lod].begin(); iter != mModelPreview->mScene[display_lod].end(); ++iter)
-					{
+        {
             for (LLModelLoader::model_instance_list::iterator model_iter = iter->second.begin(); model_iter != iter->second.end(); ++model_iter)
-					{
+            {
                 LLModelInstance& instance = *model_iter;
                 LLModel* model = instance.mModel;
                 const LLMeshSkinInfo *skin = &model->mSkinInfo;
                 U32 joint_count = LLSkinningUtil::getMeshJointCount(skin);
                 U32 bind_count = highlight_overrides ? skin->mAlternateBindMatrix.size() : 0; // simply do not include overrides if data is not needed
                 if (bind_count > 0 && bind_count != joint_count)
-						{
+                {
                     std::ostringstream out;
                     out << "Invalid joint overrides for model " << model->getName();
                     out << ". Amount of joints " << joint_count;
@@ -1390,68 +1390,68 @@ void LLFloaterModelPreview::updateAvatarTab(bool highlight_overrides)
                     addStringToLog(out.str(), true);
                     // Disable overrides for this model
                     bind_count = 0;
-						}
+                }
                 if (bind_count > 0)
-						{
+                {
                     for (U32 j = 0; j < joint_count; ++j)
-							{
-                        const LLVector3& joint_pos = skin->mAlternateBindMatrix[j].getTranslation();
+                    {
+                        const LLVector3& joint_pos = LLVector3(skin->mAlternateBindMatrix[j].getTranslation());
                         LLJointOverrideData &data = mJointOverrides[display_lod][skin->mJointNames[j]];
 
                         LLJoint* pJoint = LLModelPreview::lookupJointByName(skin->mJointNames[j], mModelPreview);
                         if (pJoint)
-							{
+                        {
                             // see how voavatar uses aboveJointPosThreshold
                             if (pJoint->aboveJointPosThreshold(joint_pos))
-				{
+                            {
                                 // valid override
                                 if (data.mPosOverrides.size() > 0
                                     && (data.mPosOverrides.begin()->second - joint_pos).lengthSquared() > (LL_JOINT_TRESHOLD_POS_OFFSET * LL_JOINT_TRESHOLD_POS_OFFSET))
-					{
+                                {
                                     // File contains multiple meshes with conflicting joint offsets
                                     // preview may be incorrect, upload result might wary (depends onto
                                     // mesh_id that hasn't been generated yet).
                                     data.mHasConflicts = true;
-							}
+                                }
                                 data.mPosOverrides[model->getName()] = joint_pos;
-						}
-						else
-						{
+                            }
+                            else
+                            {
                                 // default value, it won't be accounted for by avatar
                                 data.mModelsNoOverrides.insert(model->getName());
-					}
-					}
-				}
-			}
-			else
-			{
+                            }
+                        }
+                    }
+                }
+                else
+                {
                     for (U32 j = 0; j < joint_count; ++j)
-				{				
+                    {
                         LLJointOverrideData &data = mJointOverrides[display_lod][skin->mJointNames[j]];
                         data.mModelsNoOverrides.insert(model->getName());
                     }
                 }
-			}
-		}
-	}
+            }
+        }
+    }
 
     LLPanel *panel = mTabContainer->getPanelByName("rigging_panel");
     LLScrollListCtrl *joints_list = panel->getChild<LLScrollListCtrl>("joints_list");
 
     if (joints_list->isEmpty())
-	{
+    {
         // Populate table
 
-    std::map<std::string, std::string> joint_alias_map;
+        std::map<std::string, std::string> joint_alias_map;
         mModelPreview->getJointAliases(joint_alias_map);
-    
+
         S32 conflicts = 0;
         joint_override_data_map_t::iterator joint_iter = mJointOverrides[display_lod].begin();
         joint_override_data_map_t::iterator joint_end = mJointOverrides[display_lod].end();
         while (joint_iter != joint_end)
-	{
+        {
             const std::string& listName = joint_iter->first;
-        
+
             LLScrollListItem::Params item_params;
             item_params.value(listName);
 
@@ -1459,38 +1459,38 @@ void LLFloaterModelPreview::updateAvatarTab(bool highlight_overrides)
             cell_params.font = LLFontGL::getFontSansSerif();
             cell_params.value = listName;
             if (joint_alias_map.find(listName) == joint_alias_map.end())
-	{
+            {
                 // Missing names
                 cell_params.color = LLColor4::red;
-	}
+            }
             if (joint_iter->second.mHasConflicts)
-	{
+            {
                 // Conflicts
                 cell_params.color = LLColor4::orange;
                 conflicts++;
-	}
+            }
             if (highlight_overrides && joint_iter->second.mPosOverrides.size() > 0)
-	{
+            {
                 cell_params.font.style = "BOLD";
-	}
+            }
 
             item_params.columns.add(cell_params);
 
             joints_list->addRow(item_params, ADD_BOTTOM);
             joint_iter++;
-	}
+        }
         joints_list->selectFirstItem();
         LLScrollListItem *selected = joints_list->getFirstSelected();
         if (selected)
-{
+        {
             mSelectedJointName = selected->getValue().asString();
-	}
+        }
 
         LLTextBox *joint_conf_descr = panel->getChild<LLTextBox>("conflicts_description");
         joint_conf_descr->setTextArg("[CONFLICTS]", llformat("%d", conflicts));
         joint_conf_descr->setTextArg("[JOINTS_COUNT]", llformat("%d", mJointOverrides[display_lod].size()));
-		}
-	}
+    }
+}
 
 //-----------------------------------------------------------------------------
 // addStringToLogTab()
diff --git a/indra/newview/llfloaterwebcontent.cpp b/indra/newview/llfloaterwebcontent.cpp
index 23fd6d9c8e..ceab472c55 100644
--- a/indra/newview/llfloaterwebcontent.cpp
+++ b/indra/newview/llfloaterwebcontent.cpp
@@ -159,7 +159,7 @@ LLFloater* LLFloaterWebContent::create( Params p)
 //static
 void LLFloaterWebContent::closeRequest(const std::string &uuid)
 {
-	LLFloaterWebContent* floaterp = instance_tracker_t::getInstance(uuid);
+	auto floaterp = instance_tracker_t::getInstance(uuid);
 	if (floaterp)
 	{
 		floaterp->closeFloater(false);
@@ -169,7 +169,7 @@ void LLFloaterWebContent::closeRequest(const std::string &uuid)
 //static
 void LLFloaterWebContent::geometryChanged(const std::string &uuid, S32 x, S32 y, S32 width, S32 height)
 {
-	LLFloaterWebContent* floaterp = instance_tracker_t::getInstance(uuid);
+	auto floaterp = instance_tracker_t::getInstance(uuid);
 	if (floaterp)
 	{
 		floaterp->geometryChanged(x, y, width, height);
diff --git a/indra/newview/llglsandbox.cpp b/indra/newview/llglsandbox.cpp
index 698c15bd2d..0f288e05ca 100644
--- a/indra/newview/llglsandbox.cpp
+++ b/indra/newview/llglsandbox.cpp
@@ -993,9 +993,8 @@ private:
 //-----------------------------------------------------------------------------
 F32 gpu_benchmark()
 {
-	if (!gGLManager.mHasShaderObjects || !gGLManager.mHasTimerQuery)
-	{ // don't bother benchmarking the fixed function
-      // or venerable drivers which don't support accurate timing anyway
+	if (!gGLManager.mHasTimerQuery)
+	{ // don't bother benchmarking venerable drivers which don't support accurate timing anyway
       // and are likely to be correctly identified by the GPU table already.
 		return -1.f;
 	}
diff --git a/indra/newview/llhudnametag.cpp b/indra/newview/llhudnametag.cpp
index 9d49c30a49..55a4b5a457 100644
--- a/indra/newview/llhudnametag.cpp
+++ b/indra/newview/llhudnametag.cpp
@@ -224,6 +224,7 @@ BOOL LLHUDNameTag::lineSegmentIntersect(const LLVector4a& start, const LLVector4
 
 void LLHUDNameTag::render()
 {
+    LL_PROFILE_ZONE_SCOPED;
 	if (sDisplayText)
 	{
 		LLGLDepthTest gls_depth(GL_TRUE, GL_FALSE);
@@ -731,6 +732,7 @@ void LLHUDNameTag::updateSize()
 
 void LLHUDNameTag::updateAll()
 {
+    LL_PROFILE_ZONE_SCOPED;
 	// iterate over all text objects, calculate their restoration forces,
 	// and add them to the visible set if they are on screen and close enough
 	sVisibleTextObjects.clear();
diff --git a/indra/newview/llmainlooprepeater.cpp b/indra/newview/llmainlooprepeater.cpp
deleted file mode 100644
index 6736e9a950..0000000000
--- a/indra/newview/llmainlooprepeater.cpp
+++ /dev/null
@@ -1,88 +0,0 @@
-/** 
- * @file llmachineid.cpp
- * @brief retrieves unique machine ids
- *
- * $LicenseInfo:firstyear=2009&license=viewerlgpl$
- * Second Life Viewer Source Code
- * Copyright (C) 2010, Linden Research, Inc.
- * 
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License only.
- * 
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- * 
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
- * 
- * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
- * $/LicenseInfo$
- */
-
-#include "llviewerprecompiledheaders.h"
-#include "llapr.h"
-#include "llevents.h"
-#include "llmainlooprepeater.h"
-
-
-
-// LLMainLoopRepeater
-//-----------------------------------------------------------------------------
-
-
-LLMainLoopRepeater::LLMainLoopRepeater(void):
-	mQueue(0)
-{
-	; // No op.
-}
-
-
-void LLMainLoopRepeater::start(void)
-{
-	if(mQueue != 0) return;
-
-	mQueue = new LLThreadSafeQueue<LLSD>(1024);
-	mMainLoopConnection = LLEventPumps::instance().
-		obtain("mainloop").listen(LLEventPump::inventName(), boost::bind(&LLMainLoopRepeater::onMainLoop, this, _1));
-	mRepeaterConnection = LLEventPumps::instance().
-		obtain("mainlooprepeater").listen(LLEventPump::inventName(), boost::bind(&LLMainLoopRepeater::onMessage, this, _1));
-}
-
-
-void LLMainLoopRepeater::stop(void)
-{
-	mMainLoopConnection.release();
-	mRepeaterConnection.release();
-
-	delete mQueue;
-	mQueue = 0;
-}
-
-
-bool LLMainLoopRepeater::onMainLoop(LLSD const &)
-{
-	LLSD message;
-	while(mQueue->tryPopBack(message)) {
-		std::string pump = message["pump"].asString();
-		if(pump.length() == 0 ) continue; // No pump.
-		LLEventPumps::instance().obtain(pump).post(message["payload"]);
-	}
-	return false;
-}
-
-
-bool LLMainLoopRepeater::onMessage(LLSD const & event)
-{
-	try {
-		mQueue->pushFront(event);
-	} catch(LLThreadSafeQueueError & e) {
-		LL_WARNS() << "could not repeat message (" << e.what() << ")" << 
-			event.asString() << LL_ENDL;
-	}
-	return false;
-}
diff --git a/indra/newview/llmainlooprepeater.h b/indra/newview/llmainlooprepeater.h
deleted file mode 100644
index 2ec3a74e4a..0000000000
--- a/indra/newview/llmainlooprepeater.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/** 
- * @file llmainlooprepeater.h
- * @brief a service for repeating messages on the main loop.
- *
- * $LicenseInfo:firstyear=2010&license=viewerlgpl$
- * Second Life Viewer Source Code
- * Copyright (C) 2010, Linden Research, Inc.
- * 
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License only.
- * 
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- * 
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
- * 
- * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
- * $/LicenseInfo$
- */
-
-#ifndef LL_LLMAINLOOPREPEATER_H
-#define LL_LLMAINLOOPREPEATER_H
-
-
-#include "llsd.h"
-#include "llthreadsafequeue.h"
-
-
-//
-// A service which creates the pump 'mainlooprepeater' to which any thread can
-// post a message that will be re-posted on the main loop.
-//
-// The posted message should contain two map elements: pump and payload.  The
-// pump value is a string naming the pump to which the message should be
-// re-posted.  The payload value is what will be posted to the designated pump.
-//
-class LLMainLoopRepeater:
-	public LLSingleton<LLMainLoopRepeater>
-{
-	LLSINGLETON(LLMainLoopRepeater);
-public:
-	// Start the repeater service.
-	void start(void);
-	
-	// Stop the repeater service.
-	void stop(void);
-	
-private:
-	LLTempBoundListener mMainLoopConnection;
-	LLTempBoundListener mRepeaterConnection;
-	LLThreadSafeQueue<LLSD> * mQueue;
-	
-	bool onMainLoop(LLSD const &);
-	bool onMessage(LLSD const & event);
-};
-
-
-#endif
diff --git a/indra/newview/llmeshrepository.cpp b/indra/newview/llmeshrepository.cpp
index 2c1c1191da..8ac64dbd15 100644
--- a/indra/newview/llmeshrepository.cpp
+++ b/indra/newview/llmeshrepository.cpp
@@ -4046,28 +4046,26 @@ S32 LLMeshRepository::getActualMeshLOD(const LLVolumeParams& mesh_params, S32 lo
 const LLMeshSkinInfo* LLMeshRepository::getSkinInfo(const LLUUID& mesh_id, const LLVOVolume* requesting_obj)
 {
 	LL_RECORD_BLOCK_TIME(FTM_MESH_FETCH);
+    if (mesh_id.notNull())
+    {
+        skin_map::iterator iter = mSkinMap.find(mesh_id);
+        if (iter != mSkinMap.end())
+        {
+            return &(iter->second);
+        }
 
-	if (mesh_id.notNull())
-	{
-		skin_map::iterator iter = mSkinMap.find(mesh_id);
-		if (iter != mSkinMap.end())
-		{
-			return &(iter->second);
-		}
-		
-		//no skin info known about given mesh, try to fetch it
-		{
-			LLMutexLock lock(mMeshMutex);
-			//add volume to list of loading meshes
-			skin_load_map::iterator iter = mLoadingSkins.find(mesh_id);
-			if (iter == mLoadingSkins.end())
-			{ //no request pending for this skin info
-				mPendingSkinRequests.push(mesh_id);
-			}
-			mLoadingSkins[mesh_id].insert(requesting_obj->getID());
-		}
-	}
-
+        //no skin info known about given mesh, try to fetch it
+        {
+            LLMutexLock lock(mMeshMutex);
+            //add volume to list of loading meshes
+            skin_load_map::iterator iter = mLoadingSkins.find(mesh_id);
+            if (iter == mLoadingSkins.end())
+            { //no request pending for this skin info
+                mPendingSkinRequests.push(mesh_id);
+            }
+            mLoadingSkins[mesh_id].insert(requesting_obj->getID());
+        }
+    }
 	return NULL;
 }
 
diff --git a/indra/newview/llmeshrepository.h b/indra/newview/llmeshrepository.h
index 81e49cb1d8..c1698194cb 100644
--- a/indra/newview/llmeshrepository.h
+++ b/indra/newview/llmeshrepository.h
@@ -27,6 +27,7 @@
 #ifndef LL_MESH_REPOSITORY_H
 #define LL_MESH_REPOSITORY_H
 
+#include <unordered_map>
 #include "llassettype.h"
 #include "llmodel.h"
 #include "lluuid.h"
@@ -613,7 +614,7 @@ public:
 	typedef std::map<LLVolumeParams, std::set<LLUUID> > mesh_load_map;
 	mesh_load_map mLoadingMeshes[4];
 	
-	typedef std::map<LLUUID, LLMeshSkinInfo> skin_map;
+	typedef std::unordered_map<LLUUID, LLMeshSkinInfo> skin_map;
 	skin_map mSkinMap;
 
 	typedef std::map<LLUUID, LLModel::Decomposition*> decomposition_map;
diff --git a/indra/newview/llmodelpreview.cpp b/indra/newview/llmodelpreview.cpp
index a9e80ab5da..01bddd781d 100644
--- a/indra/newview/llmodelpreview.cpp
+++ b/indra/newview/llmodelpreview.cpp
@@ -591,7 +591,7 @@ void LLModelPreview::rebuildUploadData()
                 bool upload_skinweights = fmp && fmp->childGetValue("upload_skin").asBoolean();
                 if (upload_skinweights && high_lod_model->mSkinInfo.mJointNames.size() > 0)
                 {
-                    LLQuaternion bind_rot = LLSkinningUtil::getUnscaledQuaternion(high_lod_model->mSkinInfo.mBindShapeMatrix);
+                    LLQuaternion bind_rot = LLSkinningUtil::getUnscaledQuaternion(LLMatrix4(high_lod_model->mSkinInfo.mBindShapeMatrix));
                     LLQuaternion identity;
                     if (!bind_rot.isEqualEps(identity, 0.01))
                     {
@@ -3298,7 +3298,7 @@ BOOL LLModelPreview::render()
                                 LLJoint *joint = getPreviewAvatar()->getJoint(skin->mJointNums[j]);
                                 if (joint)
                                 {
-                                    const LLVector3& jointPos = skin->mAlternateBindMatrix[j].getTranslation();
+                                    const LLVector3& jointPos = LLVector3(skin->mAlternateBindMatrix[j].getTranslation());
                                     if (joint->aboveJointPosThreshold(jointPos))
                                     {
                                         bool override_changed;
@@ -3340,11 +3340,10 @@ BOOL LLModelPreview::render()
                             //build matrix palette
 
                             LLMatrix4a mat[LL_MAX_JOINTS_PER_MESH_OBJECT];
-                            LLSkinningUtil::initSkinningMatrixPalette((LLMatrix4*)mat, joint_count,
+                            LLSkinningUtil::initSkinningMatrixPalette(mat, joint_count,
                                 skin, getPreviewAvatar());
 
-                            LLMatrix4a bind_shape_matrix;
-                            bind_shape_matrix.loadu(skin->mBindShapeMatrix);
+                            const LLMatrix4a& bind_shape_matrix = skin->mBindShapeMatrix;
                             U32 max_joints = LLSkinningUtil::getMaxJointCount();
                             for (U32 j = 0; j < buffer->getNumVerts(); ++j)
                             {
diff --git a/indra/newview/llnotificationofferhandler.cpp b/indra/newview/llnotificationofferhandler.cpp
index a9678b1e93..d9359d20cf 100644
--- a/indra/newview/llnotificationofferhandler.cpp
+++ b/indra/newview/llnotificationofferhandler.cpp
@@ -166,14 +166,14 @@ bool LLOfferHandler::processNotification(const LLNotificationPtr& notification)
 
 /*virtual*/ void LLOfferHandler::onChange(LLNotificationPtr p)
 {
-	LLToastNotifyPanel* panelp = LLToastNotifyPanel::getInstance(p->getID());
+	auto panelp = LLToastNotifyPanel::getInstance(p->getID());
 	if (panelp)
 	{
 		//
 		// HACK: if we're dealing with a notification embedded in IM, update it
 		// otherwise remove its toast
 		//
-		if (dynamic_cast<LLIMToastNotifyPanel*>(panelp))
+		if (dynamic_cast<LLIMToastNotifyPanel*>(panelp.get()))
 		{
 			panelp->updateNotification();
 		}
diff --git a/indra/newview/llskinningutil.cpp b/indra/newview/llskinningutil.cpp
index e02b21f036..dc12de29fb 100644
--- a/indra/newview/llskinningutil.cpp
+++ b/indra/newview/llskinningutil.cpp
@@ -35,7 +35,6 @@
 #include "llrigginginfo.h"
 
 #define DEBUG_SKINNING  LL_DEBUG
-#define MAT_USE_SSE     1
 
 void dump_avatar_and_skin_state(const std::string& reason, LLVOAvatar *avatar, const LLMeshSkinInfo *skin)
 {
@@ -120,36 +119,26 @@ void LLSkinningUtil::scrubInvalidJoints(LLVOAvatar *avatar, LLMeshSkinInfo* skin
     skin->mInvalidJointsScrubbed = true;
 }
 
-#define MAT_USE_SSE 1
-
 void LLSkinningUtil::initSkinningMatrixPalette(
-    LLMatrix4* mat,
+    LLMatrix4a* mat,
     S32 count, 
     const LLMeshSkinInfo* skin,
     LLVOAvatar *avatar)
 {
+    LL_PROFILE_ZONE_SCOPED;
+
     initJointNums(const_cast<LLMeshSkinInfo*>(skin), avatar);
+
+    LLMatrix4a world[LL_CHARACTER_MAX_ANIMATED_JOINTS];
+
     for (U32 j = 0; j < count; ++j)
     {
         S32 joint_num = skin->mJointNums[j];
-        LLJoint *joint = NULL;
-        if (joint_num >= 0 && joint_num < LL_CHARACTER_MAX_ANIMATED_JOINTS)
-        {
-            joint = avatar->getJoint(joint_num);
-        }
-        llassert(joint);
+        LLJoint *joint = avatar->getJoint(joint_num);
+
         if (joint)
         {
-#ifdef MAT_USE_SSE
-            LLMatrix4a bind, world, res;
-            bind.loadu(skin->mInvBindMatrix[j]);
-            world.loadu(joint->getWorldMatrix());
-            matMul(bind,world,res);
-            memcpy(mat[j].mMatrix,res.mMatrix,16*sizeof(float));
-#else
-            mat[j] = skin->mInvBindMatrix[j];
-            mat[j] *= joint->getWorldMatrix();
-#endif
+            world[j] = joint->getWorldMatrix4a();
         }
         else
         {
@@ -159,16 +148,27 @@ void LLSkinningUtil::initSkinningMatrixPalette(
             // rendering  should  be disabled  unless  all joints  are
             // valid.  In other  cases of  skinned  rendering, invalid
             // joints should already have  been removed during scrubInvalidJoints().
-            LL_WARNS_ONCE("Avatar") << avatar->getFullname() 
-                                    << " rigged to invalid joint name " << skin->mJointNames[j] 
-                                    << " num " << skin->mJointNums[j] << LL_ENDL;
-            LL_WARNS_ONCE("Avatar") << avatar->getFullname() 
-                                    << " avatar build state: isBuilt() " << avatar->isBuilt() 
-                                    << " mInitFlags " << avatar->mInitFlags << LL_ENDL;
+            LL_WARNS_ONCE("Avatar") << avatar->getFullname()
+                << " rigged to invalid joint name " << skin->mJointNames[j]
+                << " num " << skin->mJointNums[j] << LL_ENDL;
+            LL_WARNS_ONCE("Avatar") << avatar->getFullname()
+                << " avatar build state: isBuilt() " << avatar->isBuilt()
+                << " mInitFlags " << avatar->mInitFlags << LL_ENDL;
 #endif
             dump_avatar_and_skin_state("initSkinningMatrixPalette joint not found", avatar, skin);
         }
     }
+
+    //NOTE: pointer striders used here as a micro-optimization over vector/array lookups
+    const LLMatrix4a* invBind = &(skin->mInvBindMatrix[0]);
+    const LLMatrix4a* w = world;
+    LLMatrix4a* m = mat;
+    LLMatrix4a* end = m + count;
+
+    while (m < end)
+    {
+        matMulUnsafe(*(invBind++), *(w++), *(m++));
+    }
 }
 
 void LLSkinningUtil::checkSkinWeights(LLVector4a* weights, U32 num_vertices, const LLMeshSkinInfo* skin)
@@ -212,7 +212,7 @@ void LLSkinningUtil::scrubSkinWeights(LLVector4a* weights, U32 num_vertices, con
 
 void LLSkinningUtil::getPerVertexSkinMatrix(
     F32* weights,
-    LLMatrix4a* mat,
+    const LLMatrix4a* mat,
     bool handle_bad_scale,
     LLMatrix4a& final_mat,
     U32 max_joints)
@@ -270,6 +270,7 @@ void LLSkinningUtil::initJointNums(LLMeshSkinInfo* skin, LLVOAvatar *avatar)
 {
     if (!skin->mJointNumsInitialized)
     {
+        LL_PROFILE_ZONE_SCOPED;
         for (U32 j = 0; j < skin->mJointNames.size(); ++j)
         {
     #if DEBUG_SKINNING     
@@ -357,13 +358,11 @@ void LLSkinningUtil::updateRiggingInfo(const LLMeshSkinInfo* skin, LLVOAvatar *a
                                 rig_info_tab[joint_num].setIsRiggedTo(true);
 
                                 // FIXME could precompute these matMuls.
-                                LLMatrix4a bind_shape;
-                                LLMatrix4a inv_bind;
+                                const LLMatrix4a& bind_shape = skin->mBindShapeMatrix;
+                                const LLMatrix4a& inv_bind = skin->mInvBindMatrix[joint_index];
                                 LLMatrix4a mat;
                                 LLVector4a pos_joint_space;
 
-                                bind_shape.loadu(skin->mBindShapeMatrix);
-                                inv_bind.loadu(skin->mInvBindMatrix[joint_index]);
                                 matMul(bind_shape, inv_bind, mat);
 
                                 mat.affineTransform(pos, pos_joint_space);
@@ -426,3 +425,4 @@ LLQuaternion LLSkinningUtil::getUnscaledQuaternion(const LLMatrix4& mat4)
     bind_rot.normalize();
     return bind_rot;
 }
+
diff --git a/indra/newview/llskinningutil.h b/indra/newview/llskinningutil.h
index efe7c85997..807418f983 100644
--- a/indra/newview/llskinningutil.h
+++ b/indra/newview/llskinningutil.h
@@ -42,10 +42,10 @@ namespace LLSkinningUtil
     S32 getMaxJointCount();
     U32 getMeshJointCount(const LLMeshSkinInfo *skin);
     void scrubInvalidJoints(LLVOAvatar *avatar, LLMeshSkinInfo* skin);
-    void initSkinningMatrixPalette(LLMatrix4* mat, S32 count, const LLMeshSkinInfo* skin, LLVOAvatar *avatar);
+    void initSkinningMatrixPalette(LLMatrix4a* mat, S32 count, const LLMeshSkinInfo* skin, LLVOAvatar *avatar);
     void checkSkinWeights(LLVector4a* weights, U32 num_vertices, const LLMeshSkinInfo* skin);
     void scrubSkinWeights(LLVector4a* weights, U32 num_vertices, const LLMeshSkinInfo* skin);
-    void getPerVertexSkinMatrix(F32* weights, LLMatrix4a* mat, bool handle_bad_scale, LLMatrix4a& final_mat, U32 max_joints);
+    void getPerVertexSkinMatrix(F32* weights, const LLMatrix4a* mat, bool handle_bad_scale, LLMatrix4a& final_mat, U32 max_joints);
 
     LL_FORCE_INLINE void getPerVertexSkinMatrixWithIndices(
         F32*        weights,
diff --git a/indra/newview/llspatialpartition.cpp b/indra/newview/llspatialpartition.cpp
index efa4a7fd66..253b6b9953 100644
--- a/indra/newview/llspatialpartition.cpp
+++ b/indra/newview/llspatialpartition.cpp
@@ -503,7 +503,9 @@ LLSpatialGroup* LLSpatialGroup::getParent()
 	}
 
 BOOL LLSpatialGroup::removeObject(LLDrawable *drawablep, BOOL from_octree)
-	{
+{
+	LL_PROFILE_ZONE_SCOPED
+
 	if(!drawablep)
 	{
 		return FALSE;
@@ -591,6 +593,8 @@ public:
 
 void LLSpatialGroup::setState(U32 state, S32 mode) 
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	llassert(state <= LLSpatialGroup::STATE_MASK);
 	
 	if (mode > STATE_MODE_SINGLE)
@@ -638,6 +642,8 @@ public:
 
 void LLSpatialGroup::clearState(U32 state, S32 mode)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	llassert(state <= LLSpatialGroup::STATE_MASK);
 
 	if (mode > STATE_MODE_SINGLE)
@@ -724,6 +730,8 @@ void LLSpatialGroup::updateDistance(LLCamera &camera)
 
 F32 LLSpatialPartition::calcDistance(LLSpatialGroup* group, LLCamera& camera)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	LLVector4a eye;
 	LLVector4a origin;
 	origin.load3(camera.getOrigin().mV);
@@ -815,6 +823,8 @@ F32 LLSpatialGroup::getUpdateUrgency() const
 
 BOOL LLSpatialGroup::changeLOD()
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	if (hasState(ALPHA_DIRTY | OBJECT_DIRTY))
 	{
 		//a rebuild is going to happen, update distance and LoD
@@ -907,6 +917,8 @@ void LLSpatialGroup::handleDestruction(const TreeNode* node)
 
 void LLSpatialGroup::handleChildAddition(const OctreeNode* parent, OctreeNode* child) 
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	if (child->getListenerCount() == 0)
 	{
 		new LLSpatialGroup(child, getSpatialPartition());
@@ -2700,11 +2712,17 @@ void renderPhysicsShape(LLDrawable* drawable, LLVOVolume* volume)
 			glVertexPointer(3, GL_FLOAT, 16, phys_volume->mHullPoints);
 			gGL.diffuseColor4fv(line_color.mV);
 			gGL.syncMatrices();
-			glDrawElements(GL_TRIANGLES, phys_volume->mNumHullIndices, GL_UNSIGNED_SHORT, phys_volume->mHullIndices);
+			{
+				LL_PROFILER_GPU_ZONEC( "gl.DrawElements", 0x20FF20 )
+				glDrawElements(GL_TRIANGLES, phys_volume->mNumHullIndices, GL_UNSIGNED_SHORT, phys_volume->mHullIndices);
+			}
 			
 			gGL.diffuseColor4fv(color.mV);
 			glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
-			glDrawElements(GL_TRIANGLES, phys_volume->mNumHullIndices, GL_UNSIGNED_SHORT, phys_volume->mHullIndices);			
+			{
+				LL_PROFILER_GPU_ZONEC( "gl.DrawElements", 0x40FF40 )
+				glDrawElements(GL_TRIANGLES, phys_volume->mNumHullIndices, GL_UNSIGNED_SHORT, phys_volume->mHullIndices);
+			}
 		}
 		else
 		{
@@ -3222,6 +3240,7 @@ void renderRaycast(LLDrawable* drawablep)
 						gGL.diffuseColor4f(0,1,1,0.5f);
 						glVertexPointer(3, GL_FLOAT, sizeof(LLVector4a), face.mPositions);
 						gGL.syncMatrices();
+						LL_PROFILER_GPU_ZONEC( "gl.DrawElements", 0x60FF60 );
 						glDrawElements(GL_TRIANGLES, face.mNumIndices, GL_UNSIGNED_SHORT, face.mIndices);
 					}
 					
diff --git a/indra/newview/llstartup.cpp b/indra/newview/llstartup.cpp
index 1242131534..13e7fcb6e4 100644
--- a/indra/newview/llstartup.cpp
+++ b/indra/newview/llstartup.cpp
@@ -179,7 +179,6 @@
 #include "pipeline.h"
 #include "llappviewer.h"
 #include "llfasttimerview.h"
-#include "lltelemetry.h"
 #include "llfloatermap.h"
 #include "llweb.h"
 #include "llvoiceclient.h"
@@ -206,6 +205,9 @@
 
 #include "llstacktrace.h"
 
+#include "threadpool.h"
+
+
 #if LL_WINDOWS
 #include "lldxhardware.h"
 #endif
@@ -302,6 +304,18 @@ void callback_cache_name(const LLUUID& id, const std::string& full_name, bool is
 // local classes
 //
 
+void launchThreadPool()
+{
+    LLSD poolSizes{ gSavedSettings.getLLSD("ThreadPoolSizes") };
+    LLSD sizeSpec{ poolSizes["General"] };
+    LLSD::Integer size{ sizeSpec.isInteger()? sizeSpec.asInteger() : 3 };
+    LL_DEBUGS("ThreadPool") << "Instantiating General pool with "
+                            << size << " threads" << LL_ENDL;
+    // Use a function-static ThreadPool: static duration, but instantiated
+    // only on demand.
+    static LL::ThreadPool pool("General", size);
+}
+
 void update_texture_fetch()
 {
 	LLAppViewer::getTextureCache()->update(1); // unpauses the texture cache thread
@@ -530,8 +544,6 @@ bool idle_startup()
 			}
 
 			#if LL_WINDOWS
-                LLPROFILE_STARTUP();
-
 				// On the windows dev builds, unpackaged, the message.xml file will 
 				// be located in indra/build-vc**/newview/<config>/app_settings.
 				std::string message_path = gDirUtilp->getExpandedFilename(LL_PATH_APP_SETTINGS,"message.xml");
@@ -1492,6 +1504,9 @@ bool idle_startup()
 		gAgentCamera.resetCamera();
 		display_startup();
 
+		// start up the ThreadPool we'll use for textures et al.
+		launchThreadPool();
+
 		// Initialize global class data needed for surfaces (i.e. textures)
 		LL_DEBUGS("AppInit") << "Initializing sky..." << LL_ENDL;
 		// Initialize all of the viewer object classes for the first time (doing things like texture fetches.
diff --git a/indra/newview/llsurface.cpp b/indra/newview/llsurface.cpp
index cb356726e6..ea36e1d7be 100644
--- a/indra/newview/llsurface.cpp
+++ b/indra/newview/llsurface.cpp
@@ -1214,6 +1214,7 @@ F32 LLSurface::getWaterHeight() const
 BOOL LLSurface::generateWaterTexture(const F32 x, const F32 y,
 									 const F32 width, const F32 height)
 {
+	LL_PROFILE_ZONE_SCOPED
 	if (!getWaterTexture())
 	{
 		return FALSE;
diff --git a/indra/newview/llsurfacepatch.cpp b/indra/newview/llsurfacepatch.cpp
index 5e056944e9..aeefcd6fb8 100644
--- a/indra/newview/llsurfacepatch.cpp
+++ b/indra/newview/llsurfacepatch.cpp
@@ -728,6 +728,7 @@ BOOL LLSurfacePatch::updateTexture()
 
 void LLSurfacePatch::updateGL()
 {
+	LL_PROFILE_ZONE_SCOPED
 	F32 meters_per_grid = getSurface()->getMetersPerGrid();
 	F32 grids_per_patch_edge = (F32)getSurface()->getGridsPerPatchEdge();
 
diff --git a/indra/newview/lltelemetry.cpp b/indra/newview/lltelemetry.cpp
deleted file mode 100644
index 0c63e2fede..0000000000
--- a/indra/newview/lltelemetry.cpp
+++ /dev/null
@@ -1,145 +0,0 @@
- /**
- * @file lltelemetry.cpp
- * @brief Wrapper for Rad Game Tools Telemetry
- *
- * $LicenseInfo:firstyear=2020&license=viewerlgpl$
- * Second Life Viewer Source Code
- * Copyright (C) 2020, Linden Research, Inc.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License only.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
- *
- * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
- * $/LicenseInfo$
- */
-
-#include "llviewerprecompiledheaders.h"
-
-#include "lltelemetry.h"
-
-#if LLPROFILE_USE_RAD_TELEMETRY_PROFILER
-    #if LL_WINDOWS
-        #include "llwin32headers.h"
-
-        // build-vc120-64\packages\lib\release
-        // build-vc150-64\packages\lib\release
-        #ifdef _MSC_VER
-            #pragma comment(lib,"rad_tm_win64.lib")
-        #else
-            #pragma message "NOTE: Rad GameTools Telemetry requested but non-MSVC compiler not yet supported on Windows"
-        #endif
-    #endif // LL_WINDOWS
-
-    #if LL_DARWIN
-        #pragma message "NOTE: Rad Game Tools Telemetry requested but not yet supported on Darwin"
-    #endif
-
-    #if LL_LINUX
-        #pragma message "NOTE: Rad Game Tools Telemetry requested but not yet supported on Linux"
-    #endif
-
-//
-// local consts
-//
-static const tm_int32 TELEMETRY_BUFFER_SIZE  = 8 * 1024 * 1024;
-
-//
-// local globals
-//
-static char *gTelemetryBufferPtr = NULL; // Telemetry
-
-static const char *tm_status[ TMERR_INIT_NETWORKING_FAILED + 1 ] =
-{
-      "Telemetry pass: connected"                       // TM_OK
-    , "Telemetry FAIL: disabled via #define NTELEMETRY" // TMERR_DISABLED
-    , "Telemetry FAIL: invalid paramater"               // TMERR_INVALID_PARAM
-    , "Telemetry FAIL: DLL not found"                   // TMERR_NULL_API
-    , "Telemetry FAIL: out of resources"                // TMERR_OUT_OF_RESOURCES
-    , "Telemetry FAIL: tmInitialize() not called"       // TMERR_UNINITIALIZED
-    , "Telemetry FAIL: bad hostname"                    // TMERR_BAD_HOSTNAME
-    , "Telemetry FAIL: couldn't connect to server"      // TMERR_COULD_NOT_CONNECT
-    , "Telemetry FAIL: unknown network error"           // TMERR_UNKNOWN_NETWORK
-    , "Telemetry FAIL: tmShutdown() already called"     // TMERR_ALREADY_SHUTDOWN
-    , "Telemetry FAIL: memory buffer too small"         // TMERR_ARENA_TOO_SMALL
-    , "Telemetry FAIL: server handshake error"          // TMERR_BAD_HANDSHAKE
-    , "Telemetry FAIL: unaligned parameters"            // TMERR_UNALIGNED
-    , "Telemetry FAIL: network not initialized"         // TMERR_NETWORK_NOT_INITIALIZED -- WSAStartup not called before tmOpen()
-    , "Telemetry FAIL: bad version"                     // TMERR_BAD_VERSION
-    , "Telemetry FAIL: timer too large"                 // TMERR_BAD_TIMER
-    , "Telemetry FAIL: tmOpen() already called"         // TMERR_ALREADY_OPENED
-    , "Telemetry FAIL: tmInitialize() already called"   // TMERR_ALREADY_INITIALIZED
-    , "Telemetry FAIL: could't open file"               // TMERR_FILE_OPEN_FAILED
-    , "Telemetry FAIL: tmOpen() failed networking"      // TMERR_INIT_NETWORKING_FAILED
-};
-
-//
-// exported functionality
-//
-
-void telemetry_shutdown()
-{
-    #if LL_WINDOWS
-        if (gTelemetryBufferPtr)
-        {
-            tmClose(0);
-            tmShutdown();
-
-            delete[] gTelemetryBufferPtr;
-            gTelemetryBufferPtr = NULL;
-        }
-    #endif
-}
-
-void telemetry_startup()
-{
-    #if LL_WINDOWS
-        tmLoadLibrary(TM_RELEASE); // Loads .dll
-
-        gTelemetryBufferPtr = new char[ TELEMETRY_BUFFER_SIZE ];
-        tmInitialize(TELEMETRY_BUFFER_SIZE, gTelemetryBufferPtr);
-
-        tm_error telemetry_status = tmOpen(
-            0,                     // unused
-            "SecondLife",          // app name
-            __DATE__ " " __TIME__, // build identifier
-            "localhost",           // server name (or filename)
-            TMCT_TCP,              // connection type (or TMCT_FILE)
-            4719,                  // port
-            TMOF_INIT_NETWORKING,  // open flags
-            250 );                 // timeout ms
-
-        if (telemetry_status == TMERR_UNKNOWN)
-        {
-            LL_ERRS() << "Telemetry FAIL: unknown error" << LL_ENDL;
-        }
-        else if (telemetry_status && (telemetry_status <= TMERR_INIT_NETWORKING_FAILED))
-        {
-            LL_INFOS() << tm_status[ telemetry_status ] << LL_ENDL;
-            free(gTelemetryBufferPtr);
-            gTelemetryBufferPtr = NULL;
-        }
-    #endif // LL_WINDOWS
-}
-
-// Called after we render a frame
-void telemetry_update()
-{
-    #if LL_WINDOWS
-        if (gTelemetryBufferPtr)
-        {
-            tmTick(0);
-        }
-    #endif
-}
-#endif // LLPROFILE_USE_RAD_TELEMETRY_PROFILER
diff --git a/indra/newview/lltelemetry.h b/indra/newview/lltelemetry.h
deleted file mode 100644
index a73e5fcfa2..0000000000
--- a/indra/newview/lltelemetry.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/**
- * @file lltelemetry.h
- * @brief Wrapper for Rad Game Tools Telemetry
- *
- * $LicenseInfo:firstyear=2020&license=viewerlgpl$
- * Second Life Viewer Source Code
- * Copyright (C) 2020, Linden Research, Inc.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License only.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
- *
- * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
- * $/LicenseInfo$
- */
-
-/*
-To use:
-
-1. Uncomment #define LLPROFILE_USE_RAD_TELEMETRY_PROFILER below
-
-2. Include this header file
-    #include "lltelemetry.h"
-
-3. Add zones to the functions you wish to profile
-    void onFoo()
-    {
-        LLPROFILE_ZONE("Foo");
-    }
-*/
-//#define LLPROFILE_USE_RAD_TELEMETRY_PROFILER 1
-
-// Default NO local telemetry profiling
-#ifndef LLPROFILE_USE_RAD_TELEMETRY_PROFILER
-    #define LLPROFILE_USE_RAD_TELEMETRY_PROFILER 0
-    #define LLPROFILE_SHUTDOWN( ...) {}
-    #define LLPROFILE_STARTUP(  ...) {}
-    #define LLPROFILE_UPDATE(   ...) {}
-
-    #define LLPROFILE_AUTO_CPU_MARKER_COLOR(r, g, b)
-    #define LLPROFILE_ENTER(name)
-    #define LLPROFILE_ENTER_FORMAT(format, ...)
-    #define LLPROFILE_FUNCTION
-    #define LLPROFILE_LEAVE()
-    #define LLPROFILE_THREAD_NAME(name)
-    #define LLPROFILE_ZONE(name)
-    #define LLPROFILE_ZONE_FORMAT(format, ...)
-#else
-    #include <rad_tm.h>
-
-    #define LLPROFILE_SHUTDOWN                       telemetry_shutdown
-    #define LLPROFILE_STARTUP                        telemetry_startup
-    #define LLPROFILE_UPDATE                         telemetry_update
-
-    #define LLPROFILE_AUTO_CPU_MARKER_COLOR(r, g, b) tmZoneColor(r, g, b)
-    #define LLPROFILE_ENTER(name)                    tmEnter(0, 0, name)
-    #define LLPROFILE_ENTER_FORMAT(format, ...)      tmEnter(0, 0, format, __VA_ARGS__)
-    #define LLPROFILE_FUNCTION                       tmFunction(0, 0)
-    #define LLPROFILE_LEAVE()                        tmLeave(0)
-    #define LLPROFILE_THREAD_NAME(name)              tmThreadName(0, 0, name)
-    #define LLPROFILE_ZONE(name)                     tmZone(0, 0, name)
-    #define LLPROFILE_ZONE_FORMAT(format, ...)       tmZone(0, 0, format, __VA_ARGS__)
-#endif // LLPROFILE_USE_RAD_TELEMETRY_PROFILER
-
-//
-// exported functionality
-//
-
-extern void telemetry_startup();
-extern void telemetry_shutdown();
-extern void telemetry_update(); // called after every frame update
diff --git a/indra/newview/llviewercontrollistener.cpp b/indra/newview/llviewercontrollistener.cpp
index 3443bb644a..8820f9ec56 100644
--- a/indra/newview/llviewercontrollistener.cpp
+++ b/indra/newview/llviewercontrollistener.cpp
@@ -127,7 +127,7 @@ struct Info
 
 	LLEventAPI::Response response;
 	std::string groupname;
-	LLControlGroup* group;
+	LLControlGroup::ptr_t group;
 	std::string key;
 	LLControlVariable* control;
 };
@@ -187,7 +187,7 @@ void LLViewerControlListener::groups(LLSD const & request)
 
 struct CollectVars: public LLControlGroup::ApplyFunctor
 {
-	CollectVars(LLControlGroup* g):
+	CollectVars(LLControlGroup::ptr_t g):
 		mGroup(g)
 	{}
 
@@ -200,7 +200,7 @@ struct CollectVars: public LLControlGroup::ApplyFunctor
 					("comment", control->getComment()));
 	}
 
-	LLControlGroup* mGroup;
+	LLControlGroup::ptr_t mGroup;
 	LLSD vars;
 };
 
@@ -210,7 +210,7 @@ void LLViewerControlListener::vars(LLSD const & request)
 	// control name.
 	Response response(LLSD(), request);
 	std::string groupname(request["group"]);
-	LLControlGroup* group(LLControlGroup::getInstance(groupname));
+	auto group(LLControlGroup::getInstance(groupname));
 	if (! group)
 	{
 		return response.error(STRINGIZE("Unrecognized group '" << groupname << "'"));
diff --git a/indra/newview/llviewerdisplay.cpp b/indra/newview/llviewerdisplay.cpp
index 109dc93261..33842497d1 100644
--- a/indra/newview/llviewerdisplay.cpp
+++ b/indra/newview/llviewerdisplay.cpp
@@ -208,9 +208,11 @@ void display_update_camera()
 // Write some stats to LL_INFOS()
 void display_stats()
 {
+	LL_PROFILE_ZONE_SCOPED
 	F32 fps_log_freq = gSavedSettings.getF32("FPSLogFrequency");
 	if (fps_log_freq > 0.f && gRecentFPSTime.getElapsedTimeF32() >= fps_log_freq)
 	{
+		LL_PROFILE_ZONE_NAMED("DS - FPS");
 		F32 fps = gRecentFrameCount / fps_log_freq;
 		LL_INFOS() << llformat("FPS: %.02f", fps) << LL_ENDL;
 		gRecentFrameCount = 0;
@@ -219,6 +221,7 @@ void display_stats()
 	F32 mem_log_freq = gSavedSettings.getF32("MemoryLogFrequency");
 	if (mem_log_freq > 0.f && gRecentMemoryTime.getElapsedTimeF32() >= mem_log_freq)
 	{
+		LL_PROFILE_ZONE_NAMED("DS - Memory");
 		gMemoryAllocated = U64Bytes(LLMemory::getCurrentRSS());
 		U32Megabytes memory = gMemoryAllocated;
 		LL_INFOS() << "MEMORY: " << memory << LL_ENDL;
@@ -228,6 +231,7 @@ void display_stats()
     F32 asset_storage_log_freq = gSavedSettings.getF32("AssetStorageLogFrequency");
     if (asset_storage_log_freq > 0.f && gAssetStorageLogTime.getElapsedTimeF32() >= asset_storage_log_freq)
     {
+		LL_PROFILE_ZONE_NAMED("DS - Asset Storage");
         gAssetStorageLogTime.reset();
         gAssetStorage->logAssetStorageInfo();
     }
@@ -630,6 +634,7 @@ void display(BOOL rebuild, F32 zoom_factor, int subfield, BOOL for_snapshot)
 	
 	if (!gDisconnected)
 	{
+		LL_PROFILE_ZONE_NAMED("display - 1");
 		LLAppViewer::instance()->pingMainloopTimeout("Display:Update");
 		if (gPipeline.hasRenderType(LLPipeline::RENDER_TYPE_HUD))
 		{ //don't draw hud objects in this frame
@@ -722,6 +727,7 @@ void display(BOOL rebuild, F32 zoom_factor, int subfield, BOOL for_snapshot)
 		LLAppViewer::instance()->pingMainloopTimeout("Display:Swap");
 		
 		{ 
+			LL_PROFILE_ZONE_NAMED("display - 2")
 			if (gResizeScreenTexture)
 			{
 				gResizeScreenTexture = FALSE;
@@ -777,6 +783,7 @@ void display(BOOL rebuild, F32 zoom_factor, int subfield, BOOL for_snapshot)
 
 		//if (!for_snapshot)
 		{
+			LL_PROFILE_ZONE_NAMED("display - 3")
 			LLAppViewer::instance()->pingMainloopTimeout("Display:Imagery");
 			gPipeline.generateWaterReflection(*LLViewerCamera::getInstance());
 			gPipeline.generateHighlight(*LLViewerCamera::getInstance());
@@ -825,7 +832,7 @@ void display(BOOL rebuild, F32 zoom_factor, int subfield, BOOL for_snapshot)
 				LLImageGL::deleteDeadTextures();
 				stop_glerror();
 			}*/
-			}
+		}
 
 		LLGLState::checkStates();
 		LLGLState::checkClientArrays();
@@ -840,6 +847,7 @@ void display(BOOL rebuild, F32 zoom_factor, int subfield, BOOL for_snapshot)
 		//
 		LLAppViewer::instance()->pingMainloopTimeout("Display:StateSort");
 		{
+			LL_PROFILE_ZONE_NAMED("display - 3")
 			LLViewerCamera::sCurCameraID = LLViewerCamera::CAMERA_WORLD;
 			gPipeline.stateSort(*LLViewerCamera::getInstance(), result);
 			stop_glerror();
@@ -948,6 +956,7 @@ void display(BOOL rebuild, F32 zoom_factor, int subfield, BOOL for_snapshot)
 		if (!(LLAppViewer::instance()->logoutRequestSent() && LLAppViewer::instance()->hasSavedFinalSnapshot())
 				&& !gRestoreGL)
 		{
+			LL_PROFILE_ZONE_NAMED("display - 4")
 			LLViewerCamera::sCurCameraID = LLViewerCamera::CAMERA_WORLD;
 
 			if (gSavedSettings.getBOOL("RenderDepthPrePass") && LLGLSLShader::sNoFixedFunction)
@@ -1259,7 +1268,7 @@ bool setup_hud_matrices(const LLRect& screen_region)
 
 void render_ui(F32 zoom_factor, int subfield)
 {
-    LL_RECORD_BLOCK_TIME(FTM_RENDER_UI);
+	LL_RECORD_BLOCK_TIME(FTM_RENDER_UI);
 
 	LLGLState::checkStates();
 	
@@ -1274,7 +1283,7 @@ void render_ui(F32 zoom_factor, int subfield)
 	
 	if(LLSceneMonitor::getInstance()->needsUpdate())
 	{
-        LL_RECORD_BLOCK_TIME(FTM_RENDER_UI_SCENE_MON);
+		LL_RECORD_BLOCK_TIME(FTM_RENDER_UI_SCENE_MON);
 		gGL.pushMatrix();
 		gViewerWindow->setup2DRender();
 		LLSceneMonitor::getInstance()->compare();
@@ -1282,55 +1291,64 @@ void render_ui(F32 zoom_factor, int subfield)
 		gGL.popMatrix();
 	}
 
-    // Finalize scene
-    gPipeline.renderFinalize();
+	// Finalize scene
+	gPipeline.renderFinalize();
 
-    LL_RECORD_BLOCK_TIME(FTM_RENDER_HUD);
-    render_hud_elements();
-	render_hud_attachments();
-
-	LLGLSDefault gls_default;
-	LLGLSUIDefault gls_ui;
 	{
-		gPipeline.disableLights();
-	}
+		// SL-15709
+		// NOTE: Tracy only allows one ZoneScoped per function.
+		// Solutions are:
+		// 1. Use a new scope
+		// 2. Use named zones
+		// 3. Use transient zones
+		LL_RECORD_BLOCK_TIME(FTM_RENDER_HUD);
+		render_hud_elements();
+		render_hud_attachments();
 
-	{
-		gGL.color4f(1,1,1,1);
-		if (gPipeline.hasRenderDebugFeatureMask(LLPipeline::RENDER_DEBUG_FEATURE_UI))
+		LLGLSDefault gls_default;
+		LLGLSUIDefault gls_ui;
 		{
-			if (!gDisconnected)
+			gPipeline.disableLights();
+		}
+
+		{
+			gGL.color4f(1,1,1,1);
+			if (gPipeline.hasRenderDebugFeatureMask(LLPipeline::RENDER_DEBUG_FEATURE_UI))
 			{
-                LL_RECORD_BLOCK_TIME(FTM_RENDER_UI_3D);
-				render_ui_3d();
+				if (!gDisconnected)
+				{
+					LL_RECORD_BLOCK_TIME(FTM_RENDER_UI_3D);
+					render_ui_3d();
+					LLGLState::checkStates();
+				}
+				else
+				{
+					render_disconnected_background();
+				}
+
+				LL_RECORD_BLOCK_TIME(FTM_RENDER_UI_2D);
+				render_ui_2d();
 				LLGLState::checkStates();
 			}
-			else
+			gGL.flush();
+
 			{
-				render_disconnected_background();
+				LL_RECORD_BLOCK_TIME(FTM_RENDER_UI_DEBUG_TEXT);
+				gViewerWindow->setup2DRender();
+				gViewerWindow->updateDebugText();
+				gViewerWindow->drawDebugText();
 			}
 
-            LL_RECORD_BLOCK_TIME(FTM_RENDER_UI_2D);
-			render_ui_2d();
-			LLGLState::checkStates();
+			LLVertexBuffer::unbind();
 		}
-		gGL.flush();
 
+		if (!gSnapshot)
 		{
-            LL_RECORD_BLOCK_TIME(FTM_RENDER_UI_DEBUG_TEXT);
-			gViewerWindow->setup2DRender();
-			gViewerWindow->updateDebugText();
-			gViewerWindow->drawDebugText();
+			set_current_modelview(saved_view);
+			gGL.popMatrix();
 		}
 
-		LLVertexBuffer::unbind();
-	}
-
-	if (!gSnapshot)
-	{
-		set_current_modelview(saved_view);
-		gGL.popMatrix();
-	}
+	} // Tracy integration
 }
 
 static LLTrace::BlockTimerStatHandle FTM_SWAP("Swap");
diff --git a/indra/newview/llviewermessage.cpp b/indra/newview/llviewermessage.cpp
index 39c891c9c1..94d2d216b9 100644
--- a/indra/newview/llviewermessage.cpp
+++ b/indra/newview/llviewermessage.cpp
@@ -3978,8 +3978,8 @@ void process_sim_stats(LLMessageSystem *msg, void **user_data)
 		F32 stat_value;
 		msg->getU32("Stat", "StatID", stat_id, i);
 		msg->getF32("Stat", "StatValue", stat_value, i);
-		LLStatViewer::SimMeasurementSampler* measurementp = LLStatViewer::SimMeasurementSampler::getInstance((ESimStatID)stat_id);
-		
+		auto measurementp = LLStatViewer::SimMeasurementSampler::getInstance((ESimStatID)stat_id);
+
 		if (measurementp )
 		{
 			measurementp->sample(stat_value);
diff --git a/indra/newview/llviewerobjectlist.cpp b/indra/newview/llviewerobjectlist.cpp
index 63e48d1dd0..0b20556104 100644
--- a/indra/newview/llviewerobjectlist.cpp
+++ b/indra/newview/llviewerobjectlist.cpp
@@ -168,6 +168,8 @@ U64 LLViewerObjectList::getIndex(const U32 local_id,
 
 BOOL LLViewerObjectList::removeFromLocalIDTable(const LLViewerObject* objectp)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	if(objectp && objectp->getRegion())
 	{
 		U32 local_id = objectp->mLocalID;		
@@ -303,6 +305,8 @@ static LLTrace::BlockTimerStatHandle FTM_PROCESS_OBJECTS("Process Objects");
 
 LLViewerObject* LLViewerObjectList::processObjectUpdateFromCache(LLVOCacheEntry* entry, LLViewerRegion* regionp)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	LLDataPacker *cached_dpp = entry->getDP();
 
 	if (!cached_dpp)
@@ -848,6 +852,8 @@ static LLTrace::BlockTimerStatHandle FTM_IDLE_COPY("Idle Copy");
 
 void LLViewerObjectList::update(LLAgent &agent)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	// Update globals
 	LLViewerObject::setVelocityInterpolate( gSavedSettings.getBOOL("VelocityInterpolate") );
 	LLViewerObject::setPingInterpolate( gSavedSettings.getBOOL("PingInterpolate") );
@@ -1293,6 +1299,8 @@ void LLViewerObjectList::clearDebugText()
 
 void LLViewerObjectList::cleanupReferences(LLViewerObject *objectp)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	bool new_dead_object = true;
 	if (mDeadObjects.find(objectp->mID) != mDeadObjects.end())
 	{
@@ -1523,6 +1531,8 @@ void LLViewerObjectList::removeFromActiveList(LLViewerObject* objectp)
 
 void LLViewerObjectList::updateActive(LLViewerObject *objectp)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	if (objectp->isDead())
 	{
 		return; // We don't update dead objects!
@@ -1843,6 +1853,8 @@ void LLViewerObjectList::renderObjectBounds(const LLVector3 &center)
 
 void LLViewerObjectList::generatePickList(LLCamera &camera)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 		LLViewerObject *objectp;
 		S32 i;
 		// Reset all of the GL names to zero.
@@ -2104,6 +2116,8 @@ LLViewerObject *LLViewerObjectList::replaceObject(const LLUUID &id, const LLPCod
 
 S32 LLViewerObjectList::findReferences(LLDrawable *drawablep) const
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	LLViewerObject *objectp;
 	S32 num_refs = 0;
 	
@@ -2167,6 +2181,8 @@ void LLViewerObjectList::orphanize(LLViewerObject *childp, U32 parent_id, U32 ip
 
 void LLViewerObjectList::findOrphans(LLViewerObject* objectp, U32 ip, U32 port)
 {
+	LL_PROFILE_ZONE_SCOPED
+
 	if (objectp->isDead())
 	{
 		LL_WARNS() << "Trying to find orphans for dead obj " << objectp->mID 
diff --git a/indra/newview/llviewerparceloverlay.cpp b/indra/newview/llviewerparceloverlay.cpp
index 7c3dd00e1a..02f7bbeed8 100644
--- a/indra/newview/llviewerparceloverlay.cpp
+++ b/indra/newview/llviewerparceloverlay.cpp
@@ -847,6 +847,7 @@ void LLViewerParcelOverlay::setDirty()
 
 void LLViewerParcelOverlay::updateGL()
 {
+	LL_PROFILE_ZONE_SCOPED
 	updateOverlayTexture();
 }
 
diff --git a/indra/newview/llviewershadermgr.cpp b/indra/newview/llviewershadermgr.cpp
index be5c22e7c3..7dcf29eb75 100644
--- a/indra/newview/llviewershadermgr.cpp
+++ b/indra/newview/llviewershadermgr.cpp
@@ -384,7 +384,7 @@ void LLViewerShaderMgr::initAttribsAndUniforms(void)
 
 S32 LLViewerShaderMgr::getShaderLevel(S32 type)
 {
-	return LLPipeline::sDisableShaders ? 0 : mShaderLevel[type];
+	return mShaderLevel[type];
 }
 
 //============================================================================
@@ -400,15 +400,6 @@ void LLViewerShaderMgr::setShaders()
         return;
     }
 
-    if (!gGLManager.mHasShaderObjects
-        || !gGLManager.mHasVertexShader
-        || !gGLManager.mHasFragmentShader)
-    {
-        // Viewer will show 'hardware requirements' warning later
-        LL_INFOS("ShaderLoading") << "Shaders not supported" << LL_ENDL;
-        return;
-    }
-
     static LLCachedControl<U32> max_texture_index(gSavedSettings, "RenderMaxTextureIndex", 16);
     LLGLSLShader::sIndexedTextureChannels = llmax(llmin(gGLManager.mNumTextureImageUnits, (S32) max_texture_index), 1);
 
diff --git a/indra/newview/llviewertexture.cpp b/indra/newview/llviewertexture.cpp
index ca01bb46aa..af55f68cd2 100644
--- a/indra/newview/llviewertexture.cpp
+++ b/indra/newview/llviewertexture.cpp
@@ -208,6 +208,7 @@ void  LLViewerTextureManager::findTextures(const LLUUID& id, std::vector<LLViewe
 
 LLViewerFetchedTexture* LLViewerTextureManager::findFetchedTexture(const LLUUID& id, S32 tex_type)
 {
+    LL_PROFILE_ZONE_SCOPED;
     return gTextureList.findImage(id, (ETexListType)tex_type);
 }
 
diff --git a/indra/newview/llviewertexturelist.cpp b/indra/newview/llviewertexturelist.cpp
index 561319ca5d..12495078e9 100644
--- a/indra/newview/llviewertexturelist.cpp
+++ b/indra/newview/llviewertexturelist.cpp
@@ -620,6 +620,7 @@ void LLViewerTextureList::findTexturesByID(const LLUUID &image_id, std::vector<L
 
 LLViewerFetchedTexture *LLViewerTextureList::findImage(const LLTextureKey &search_key)
 {
+    LL_PROFILE_ZONE_SCOPED;
     uuid_map_t::iterator iter = mUUIDMap.find(search_key);
     if (iter == mUUIDMap.end())
         return NULL;
diff --git a/indra/newview/llviewerwindow.cpp b/indra/newview/llviewerwindow.cpp
index 1d13a306ef..2157585364 100644
--- a/indra/newview/llviewerwindow.cpp
+++ b/indra/newview/llviewerwindow.cpp
@@ -5280,6 +5280,7 @@ void LLViewerWindow::setup3DRender()
 
 void LLViewerWindow::setup3DViewport(S32 x_offset, S32 y_offset)
 {
+	LL_PROFILE_ZONE_SCOPED
 	gGLViewport[0] = mWorldViewRectRaw.mLeft + x_offset;
 	gGLViewport[1] = mWorldViewRectRaw.mBottom + y_offset;
 	gGLViewport[2] = mWorldViewRectRaw.getWidth();
diff --git a/indra/newview/llvlcomposition.cpp b/indra/newview/llvlcomposition.cpp
index c63c5f6b23..46beac8255 100644
--- a/indra/newview/llvlcomposition.cpp
+++ b/indra/newview/llvlcomposition.cpp
@@ -254,6 +254,7 @@ BOOL LLVLComposition::generateComposition()
 BOOL LLVLComposition::generateTexture(const F32 x, const F32 y,
 									  const F32 width, const F32 height)
 {
+	LL_PROFILE_ZONE_SCOPED
 	llassert(mSurfacep);
 	llassert(x >= 0.f);
 	llassert(y >= 0.f);
diff --git a/indra/newview/llvoavatar.cpp b/indra/newview/llvoavatar.cpp
index e085a945a8..a588d05ff7 100644
--- a/indra/newview/llvoavatar.cpp
+++ b/indra/newview/llvoavatar.cpp
@@ -3136,6 +3136,8 @@ void LLVOAvatar::idleUpdateWindEffect()
 
 void LLVOAvatar::idleUpdateNameTag(const LLVector3& root_pos_last)
 {
+    LL_PROFILE_ZONE_SCOPED;
+
 	// update chat bubble
 	//--------------------------------------------------------------------
 	// draw text label over character's head
@@ -4884,6 +4886,8 @@ bool LLVOAvatar::shouldAlphaMask()
 //-----------------------------------------------------------------------------
 U32 LLVOAvatar::renderSkinned()
 {
+    LL_PROFILE_ZONE_SCOPED;
+
 	U32 num_indices = 0;
 
 	if (!mIsBuilt)
@@ -6158,27 +6162,29 @@ LLJoint *LLVOAvatar::getJoint( const std::string &name )
 LLJoint *LLVOAvatar::getJoint( S32 joint_num )
 {
     LLJoint *pJoint = NULL;
-    S32 collision_start = mNumBones;
-    S32 attachment_start = mNumBones + mNumCollisionVolumes;
-    if (joint_num>=attachment_start)
+    if (joint_num >= 0)
     {
-        // Attachment IDs start at 1
-        S32 attachment_id = joint_num - attachment_start + 1;
-        attachment_map_t::iterator iter = mAttachmentPoints.find(attachment_id);
-        if (iter != mAttachmentPoints.end())
+        if (joint_num < mNumBones)
         {
-            pJoint = iter->second;
+            pJoint = mSkeleton[joint_num];
+        }
+        else if (joint_num < mNumBones + mNumCollisionVolumes)
+        {
+            S32 collision_id = joint_num - mNumBones;
+            pJoint = &mCollisionVolumes[collision_id];
+        }
+        else
+        {
+            // Attachment IDs start at 1
+            S32 attachment_id = joint_num - (mNumBones + mNumCollisionVolumes) + 1;
+            attachment_map_t::iterator iter = mAttachmentPoints.find(attachment_id);
+            if (iter != mAttachmentPoints.end())
+            {
+                pJoint = iter->second;
+            }
         }
     }
-    else if (joint_num>=collision_start)
-    {
-        S32 collision_id = joint_num-collision_start;
-        pJoint = &mCollisionVolumes[collision_id];
-    }
-    else if (joint_num>=0)
-    {
-        pJoint = mSkeleton[joint_num];
-    }
+    
 	llassert(!pJoint || pJoint->getJointNum() == joint_num);
     return pJoint;
 }
@@ -6513,7 +6519,7 @@ void LLVOAvatar::addAttachmentOverridesForObject(LLViewerObject *vo, std::set<LL
 					LLJoint* pJoint = getJoint( lookingForJoint );
 					if (pJoint)
 					{   									
-						const LLVector3& jointPos = pSkinData->mAlternateBindMatrix[i].getTranslation();									
+						const LLVector3& jointPos = LLVector3(pSkinData->mAlternateBindMatrix[i].getTranslation());
                         if (pJoint->aboveJointPosThreshold(jointPos))
                         {
                             bool override_changed;
@@ -7117,6 +7123,7 @@ void LLVOAvatar::updateGL()
 {
 	if (mMeshTexturesDirty)
 	{
+		LL_PROFILE_ZONE_SCOPED
 		updateMeshTextures();
 		mMeshTexturesDirty = FALSE;
 	}
@@ -7862,6 +7869,8 @@ void LLVOAvatar::onGlobalColorChanged(const LLTexGlobalColor* global_color)
 // Do rigged mesh attachments display with this av?
 bool LLVOAvatar::shouldRenderRigged() const
 {
+    LL_PROFILE_ZONE_SCOPED;
+
 	if (getOverallAppearance() == AOA_NORMAL)
 	{
 		return true;
@@ -8353,6 +8362,7 @@ void LLVOAvatar::updateMeshVisibility()
 // virtual
 void LLVOAvatar::updateMeshTextures()
 {
+	LL_PROFILE_ZONE_SCOPED
 	static S32 update_counter = 0;
 	mBakedTextureDebugText.clear();
 	
@@ -10949,6 +10959,7 @@ void LLVOAvatar::updateOverallAppearanceAnimations()
 // Based on isVisuallyMuted(), but has 3 possible results.
 LLVOAvatar::AvatarOverallAppearance LLVOAvatar::getOverallAppearance() const
 {
+    LL_PROFILE_ZONE_SCOPED;
 	AvatarOverallAppearance result = AOA_NORMAL;
 
 	// Priority order (highest priority first)
diff --git a/indra/newview/llvoavatar.h b/indra/newview/llvoavatar.h
index 74ef589ca4..39adaab8ca 100644
--- a/indra/newview/llvoavatar.h
+++ b/indra/newview/llvoavatar.h
@@ -209,6 +209,11 @@ public:
 	virtual LLJoint*		getJoint(const std::string &name);
 	LLJoint*		        getJoint(S32 num);
 
+    //if you KNOW joint_num is a valid animated joint index, use getSkeletonJoint for efficiency
+    inline LLJoint* getSkeletonJoint(S32 joint_num) { return mSkeleton[joint_num]; }
+    inline size_t getSkeletonJointCount() const { return mSkeleton.size(); }
+
+
 	void 					addAttachmentOverridesForObject(LLViewerObject *vo, std::set<LLUUID>* meshes_seen = NULL, bool recursive = true);
 	void					removeAttachmentOverridesForObject(const LLUUID& mesh_id);
 	void					removeAttachmentOverridesForObject(LLViewerObject *vo);
diff --git a/indra/newview/llvosurfacepatch.cpp b/indra/newview/llvosurfacepatch.cpp
index 897bace4e1..b5560d3d79 100644
--- a/indra/newview/llvosurfacepatch.cpp
+++ b/indra/newview/llvosurfacepatch.cpp
@@ -218,6 +218,7 @@ void LLVOSurfacePatch::updateGL()
 {
 	if (mPatchp)
 	{
+		LL_PROFILE_ZONE_SCOPED
 		mPatchp->updateGL();
 	}
 }
diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp
index f063800587..e5a4b0f374 100644
--- a/indra/newview/llvovolume.cpp
+++ b/indra/newview/llvovolume.cpp
@@ -4807,7 +4807,7 @@ void LLRiggedVolume::update(const LLMeshSkinInfo* skin, LLVOAvatar* avatar, cons
 
 	LLMatrix4a mat[kMaxJoints];
 	U32 maxJoints = LLSkinningUtil::getMeshJointCount(skin);
-    LLSkinningUtil::initSkinningMatrixPalette((LLMatrix4*)mat, maxJoints, skin, avatar);
+    LLSkinningUtil::initSkinningMatrixPalette(mat, maxJoints, skin, avatar);
 
     S32 rigged_vert_count = 0;
     S32 rigged_face_count = 0;
@@ -4823,8 +4823,7 @@ void LLRiggedVolume::update(const LLMeshSkinInfo* skin, LLVOAvatar* avatar, cons
 		if ( weight )
 		{
             LLSkinningUtil::checkSkinWeights(weight, dst_face.mNumVertices, skin);
-			LLMatrix4a bind_shape_matrix;
-			bind_shape_matrix.loadu(skin->mBindShapeMatrix);
+			const LLMatrix4a& bind_shape_matrix = skin->mBindShapeMatrix;
 
 			LLVector4a* pos = dst_face.mPositions;
 
@@ -6045,123 +6044,130 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group)
 	if (group && group->hasState(LLSpatialGroup::MESH_DIRTY) && !group->hasState(LLSpatialGroup::GEOM_DIRTY))
 	{
 		LL_RECORD_BLOCK_TIME(FTM_REBUILD_VOLUME_VB);
-		LL_RECORD_BLOCK_TIME(FTM_REBUILD_VOLUME_GEN_DRAW_INFO); //make sure getgeometryvolume shows up in the right place in timers
+		{
+			// SL-15709 -- NOTE: Tracy only allows one ZoneScoped per function.
+			// Solutions are:
+			// 1. Use a new scope
+			// 2. Use named zones
+			// 3. Use transient zones
+			LL_RECORD_BLOCK_TIME(FTM_REBUILD_VOLUME_GEN_DRAW_INFO); //make sure getgeometryvolume shows up in the right place in timers
 
-		group->mBuilt = 1.f;
+			group->mBuilt = 1.f;
 		
-		S32 num_mapped_vertex_buffer = LLVertexBuffer::sMappedCount ;
+			S32 num_mapped_vertex_buffer = LLVertexBuffer::sMappedCount ;
 
-		const U32 MAX_BUFFER_COUNT = 4096;
-		LLVertexBuffer* locked_buffer[MAX_BUFFER_COUNT];
-		
-		U32 buffer_count = 0;
+			const U32 MAX_BUFFER_COUNT = 4096;
+			LLVertexBuffer* locked_buffer[MAX_BUFFER_COUNT];
 
-		for (LLSpatialGroup::element_iter drawable_iter = group->getDataBegin(); drawable_iter != group->getDataEnd(); ++drawable_iter)
-		{
-			LLDrawable* drawablep = (LLDrawable*)(*drawable_iter)->getDrawable();
+			U32 buffer_count = 0;
 
-			if (drawablep && !drawablep->isDead() && drawablep->isState(LLDrawable::REBUILD_ALL) && !drawablep->isState(LLDrawable::RIGGED) )
+			for (LLSpatialGroup::element_iter drawable_iter = group->getDataBegin(); drawable_iter != group->getDataEnd(); ++drawable_iter)
 			{
-				LLVOVolume* vobj = drawablep->getVOVolume();
-                if (debugLoggingEnabled("AnimatedObjectsLinkset"))
-                {
-                    if (vobj->isAnimatedObject() && vobj->isRiggedMesh())
-                    {
-                        std::string vobj_name = llformat("Vol%p", vobj);
-                        F32 est_tris = vobj->getEstTrianglesMax();
-                        LL_DEBUGS("AnimatedObjectsLinkset") << vobj_name << " rebuildMesh, tris " << est_tris << LL_ENDL; 
-                    }
-                }
-				if (vobj->isNoLOD()) continue;
-
-				vobj->preRebuild();
-
-				if (drawablep->isState(LLDrawable::ANIMATED_CHILD))
-				{
-					vobj->updateRelativeXform(true);
-				}
+				LLDrawable* drawablep = (LLDrawable*)(*drawable_iter)->getDrawable();
 
-				LLVolume* volume = vobj->getVolume();
-				for (S32 i = 0; i < drawablep->getNumFaces(); ++i)
+				if (drawablep && !drawablep->isDead() && drawablep->isState(LLDrawable::REBUILD_ALL) && !drawablep->isState(LLDrawable::RIGGED) )
 				{
-					LLFace* face = drawablep->getFace(i);
-					if (face)
+					LLVOVolume* vobj = drawablep->getVOVolume();
+					if (debugLoggingEnabled("AnimatedObjectsLinkset"))
 					{
-						LLVertexBuffer* buff = face->getVertexBuffer();
-						if (buff)
+						if (vobj->isAnimatedObject() && vobj->isRiggedMesh())
 						{
-							llassert(!face->isState(LLFace::RIGGED));
+							std::string vobj_name = llformat("Vol%p", vobj);
+							F32 est_tris = vobj->getEstTrianglesMax();
+							LL_DEBUGS("AnimatedObjectsLinkset") << vobj_name << " rebuildMesh, tris " << est_tris << LL_ENDL;
+						}
+					}
+					if (vobj->isNoLOD()) continue;
 
-							if (!face->getGeometryVolume(*volume, face->getTEOffset(), 
-								vobj->getRelativeXform(), vobj->getRelativeXformInvTrans(), face->getGeomIndex()))
-							{ //something's gone wrong with the vertex buffer accounting, rebuild this group 
-								group->dirtyGeom();
-								gPipeline.markRebuild(group, TRUE);
-							}
+					vobj->preRebuild();
 
+					if (drawablep->isState(LLDrawable::ANIMATED_CHILD))
+					{
+						vobj->updateRelativeXform(true);
+					}
 
-							if (buff->isLocked() && buffer_count < MAX_BUFFER_COUNT)
+					LLVolume* volume = vobj->getVolume();
+					for (S32 i = 0; i < drawablep->getNumFaces(); ++i)
+					{
+						LLFace* face = drawablep->getFace(i);
+						if (face)
+						{
+							LLVertexBuffer* buff = face->getVertexBuffer();
+							if (buff)
 							{
-								locked_buffer[buffer_count++] = buff;
+								llassert(!face->isState(LLFace::RIGGED));
+
+								if (!face->getGeometryVolume(*volume, face->getTEOffset(), 
+									vobj->getRelativeXform(), vobj->getRelativeXformInvTrans(), face->getGeomIndex()))
+								{ //something's gone wrong with the vertex buffer accounting, rebuild this group 
+									group->dirtyGeom();
+									gPipeline.markRebuild(group, TRUE);
+								}
+
+
+								if (buff->isLocked() && buffer_count < MAX_BUFFER_COUNT)
+								{
+									locked_buffer[buffer_count++] = buff;
+								}
 							}
 						}
 					}
+
+					if (drawablep->isState(LLDrawable::ANIMATED_CHILD))
+					{
+						vobj->updateRelativeXform();
+					}
+
+					drawablep->clearState(LLDrawable::REBUILD_ALL);
 				}
+			}
 
-				if (drawablep->isState(LLDrawable::ANIMATED_CHILD))
+			{
+				LL_RECORD_BLOCK_TIME(FTM_REBUILD_MESH_FLUSH);
+				for (LLVertexBuffer** iter = locked_buffer, ** end_iter = locked_buffer+buffer_count; iter != end_iter; ++iter)
 				{
-					vobj->updateRelativeXform();
+					(*iter)->flush();
 				}
 
-				
-				drawablep->clearState(LLDrawable::REBUILD_ALL);
+				// don't forget alpha
+				if(group != NULL &&
+				   !group->mVertexBuffer.isNull() &&
+				   group->mVertexBuffer->isLocked())
+				{
+					group->mVertexBuffer->flush();
+				}
 			}
-		}
-		
-		{
-			LL_RECORD_BLOCK_TIME(FTM_REBUILD_MESH_FLUSH);
-			for (LLVertexBuffer** iter = locked_buffer, ** end_iter = locked_buffer+buffer_count; iter != end_iter; ++iter)
-		{
-			(*iter)->flush();
-		}
-
-		// don't forget alpha
-		if(group != NULL && 
-		   !group->mVertexBuffer.isNull() && 
-		   group->mVertexBuffer->isLocked())
-		{
-			group->mVertexBuffer->flush();
-		}
-		}
 
-		//if not all buffers are unmapped
-		if(num_mapped_vertex_buffer != LLVertexBuffer::sMappedCount) 
-		{
-			LL_WARNS() << "Not all mapped vertex buffers are unmapped!" << LL_ENDL ; 
-			for (LLSpatialGroup::element_iter drawable_iter = group->getDataBegin(); drawable_iter != group->getDataEnd(); ++drawable_iter)
+			//if not all buffers are unmapped
+			if(num_mapped_vertex_buffer != LLVertexBuffer::sMappedCount)
 			{
-				LLDrawable* drawablep = (LLDrawable*)(*drawable_iter)->getDrawable();
-				if(!drawablep)
-				{
-					continue;
-				}
-				for (S32 i = 0; i < drawablep->getNumFaces(); ++i)
+				LL_WARNS() << "Not all mapped vertex buffers are unmapped!" << LL_ENDL ;
+				for (LLSpatialGroup::element_iter drawable_iter = group->getDataBegin(); drawable_iter != group->getDataEnd(); ++drawable_iter)
 				{
-					LLFace* face = drawablep->getFace(i);
-					if (face)
+					LLDrawable* drawablep = (LLDrawable*)(*drawable_iter)->getDrawable();
+					if(!drawablep)
+					{
+						continue;
+					}
+					for (S32 i = 0; i < drawablep->getNumFaces(); ++i)
 					{
-						LLVertexBuffer* buff = face->getVertexBuffer();
-						if (buff && buff->isLocked())
+						LLFace* face = drawablep->getFace(i);
+						if (face)
 						{
-							buff->flush();
+							LLVertexBuffer* buff = face->getVertexBuffer();
+							if (buff && buff->isLocked())
+							{
+								buff->flush();
+							}
 						}
 					}
 				}
-			} 
+			}
+
+			group->clearState(LLSpatialGroup::MESH_DIRTY | LLSpatialGroup::NEW_DRAWINFO);
 		}
 
-		group->clearState(LLSpatialGroup::MESH_DIRTY | LLSpatialGroup::NEW_DRAWINFO);
-	}
+	} // Tracy integration
 
 //	llassert(!group || !group->isState(LLSpatialGroup::NEW_DRAWINFO));
 }
diff --git a/indra/newview/llworld.cpp b/indra/newview/llworld.cpp
index a1a1db35d6..c7b0a2bfb4 100644
--- a/indra/newview/llworld.cpp
+++ b/indra/newview/llworld.cpp
@@ -1284,6 +1284,7 @@ void send_agent_pause()
 
 void send_agent_resume()
 {
+	LL_PROFILE_ZONE_SCOPED
 	// Note: used to check for LLWorld initialization before it became a singleton.
 	// Rather than just remove this check I'm changing it to assure that the message 
 	// system has been initialized. -MG
diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp
index cd1b9c7c69..c0b469af81 100644
--- a/indra/newview/pipeline.cpp
+++ b/indra/newview/pipeline.cpp
@@ -338,7 +338,6 @@ S32		LLPipeline::sUseOcclusion = 0;
 bool	LLPipeline::sDelayVBUpdate = true;
 bool	LLPipeline::sAutoMaskAlphaDeferred = true;
 bool	LLPipeline::sAutoMaskAlphaNonDeferred = false;
-bool	LLPipeline::sDisableShaders = false;
 bool	LLPipeline::sRenderTransparentWater = true;
 bool	LLPipeline::sRenderBump = true;
 bool	LLPipeline::sBakeSunlight = false;
@@ -1393,10 +1392,7 @@ void LLPipeline::restoreGL()
 
 bool LLPipeline::canUseVertexShaders()
 {
-	if (sDisableShaders ||
-		!gGLManager.mHasVertexShader ||
-		!gGLManager.mHasFragmentShader ||
-		(assertInitialized() && mVertexShadersLoaded != 1) )
+	if ((assertInitialized() && mVertexShadersLoaded != 1) )
 	{
 		return false;
 	}
@@ -1408,8 +1404,7 @@ bool LLPipeline::canUseVertexShaders()
 
 bool LLPipeline::canUseWindLightShaders() const
 {
-	return (!LLPipeline::sDisableShaders &&
-			gWLSkyProgram.mProgramObject != 0 &&
+	return (gWLSkyProgram.mProgramObject != 0 &&
 			LLViewerShaderMgr::instance()->getShaderLevel(LLViewerShaderMgr::SHADER_WINDLIGHT) > 1);
 }
 
@@ -1897,6 +1892,7 @@ void LLPipeline::createObject(LLViewerObject* vobj)
 
 void LLPipeline::resetFrameStats()
 {
+	LL_PROFILE_ZONE_SCOPED
 	assertInitialized();
 
 	sCompiles        = 0;
@@ -2125,6 +2121,7 @@ void LLPipeline::grabReferences(LLCullResult& result)
 
 void LLPipeline::clearReferences()
 {
+	LL_PROFILE_ZONE_SCOPED
 	sCull = NULL;
 	mGroupSaveQ1.clear();
 }
@@ -4564,92 +4561,99 @@ void LLPipeline::renderGeomDeferred(LLCamera& camera)
 	LLAppViewer::instance()->pingMainloopTimeout("Pipeline:RenderGeomDeferred");
 
 	LL_RECORD_BLOCK_TIME(FTM_RENDER_GEOMETRY);
+	{
+		// SL-15709 -- NOTE: Tracy only allows one ZoneScoped per function.
+		// Solutions are:
+		// 1. Use a new scope
+		// 2. Use named zones
+		// 3. Use transient zones
+		LL_RECORD_BLOCK_TIME(FTM_DEFERRED_POOLS);
 
-	LL_RECORD_BLOCK_TIME(FTM_DEFERRED_POOLS);
-
-	LLGLEnable cull(GL_CULL_FACE);
+		LLGLEnable cull(GL_CULL_FACE);
 
-	for (pool_set_t::iterator iter = mPools.begin(); iter != mPools.end(); ++iter)
-	{
-		LLDrawPool *poolp = *iter;
-		if (hasRenderType(poolp->getType()))
+		for (pool_set_t::iterator iter = mPools.begin(); iter != mPools.end(); ++iter)
 		{
-			poolp->prerender();
+			LLDrawPool *poolp = *iter;
+			if (hasRenderType(poolp->getType()))
+			{
+				poolp->prerender();
+			}
 		}
-	}
 
-	LLGLEnable multisample(RenderFSAASamples > 0 ? GL_MULTISAMPLE_ARB : 0);
+		LLGLEnable multisample(RenderFSAASamples > 0 ? GL_MULTISAMPLE_ARB : 0);
 
-	LLVertexBuffer::unbind();
+		LLVertexBuffer::unbind();
 
-	LLGLState::checkStates();
-	LLGLState::checkTextureChannels();
-	LLGLState::checkClientArrays();
+		LLGLState::checkStates();
+		LLGLState::checkTextureChannels();
+		LLGLState::checkClientArrays();
 
-	U32 cur_type = 0;
+		U32 cur_type = 0;
 
-	gGL.setColorMask(true, true);
+		gGL.setColorMask(true, true);
 	
-	pool_set_t::iterator iter1 = mPools.begin();
+		pool_set_t::iterator iter1 = mPools.begin();
 
-	while ( iter1 != mPools.end() )
-	{
-		LLDrawPool *poolp = *iter1;
+		while ( iter1 != mPools.end() )
+		{
+			LLDrawPool *poolp = *iter1;
 		
-		cur_type = poolp->getType();
+			cur_type = poolp->getType();
 
-		pool_set_t::iterator iter2 = iter1;
-		if (hasRenderType(poolp->getType()) && poolp->getNumDeferredPasses() > 0)
-		{
-			LL_RECORD_BLOCK_TIME(FTM_DEFERRED_POOLRENDER);
+			pool_set_t::iterator iter2 = iter1;
+			if (hasRenderType(poolp->getType()) && poolp->getNumDeferredPasses() > 0)
+			{
+				LL_RECORD_BLOCK_TIME(FTM_DEFERRED_POOLRENDER);
 
-			gGLLastMatrix = NULL;
-			gGL.loadMatrix(gGLModelView);
+				gGLLastMatrix = NULL;
+				gGL.loadMatrix(gGLModelView);
 		
-			for( S32 i = 0; i < poolp->getNumDeferredPasses(); i++ )
-			{
-				LLVertexBuffer::unbind();
-				poolp->beginDeferredPass(i);
-				for (iter2 = iter1; iter2 != mPools.end(); iter2++)
+				for( S32 i = 0; i < poolp->getNumDeferredPasses(); i++ )
 				{
-					LLDrawPool *p = *iter2;
-					if (p->getType() != cur_type)
+					LLVertexBuffer::unbind();
+					poolp->beginDeferredPass(i);
+					for (iter2 = iter1; iter2 != mPools.end(); iter2++)
 					{
-						break;
+						LLDrawPool *p = *iter2;
+						if (p->getType() != cur_type)
+						{
+							break;
+						}
+
+						if ( !p->getSkipRenderFlag() ) { p->renderDeferred(i); }
 					}
-										
-					if ( !p->getSkipRenderFlag() ) { p->renderDeferred(i); }
-				}
-				poolp->endDeferredPass(i);
-				LLVertexBuffer::unbind();
+					poolp->endDeferredPass(i);
+					LLVertexBuffer::unbind();
 
-				if (gDebugGL || gDebugPipeline)
-				{
-					LLGLState::checkStates();
+					if (gDebugGL || gDebugPipeline)
+					{
+						LLGLState::checkStates();
+					}
 				}
 			}
-		}
-		else
-		{
-			// Skip all pools of this type
-			for (iter2 = iter1; iter2 != mPools.end(); iter2++)
+			else
 			{
-				LLDrawPool *p = *iter2;
-				if (p->getType() != cur_type)
+				// Skip all pools of this type
+				for (iter2 = iter1; iter2 != mPools.end(); iter2++)
 				{
-					break;
+					LLDrawPool *p = *iter2;
+					if (p->getType() != cur_type)
+					{
+						break;
+					}
 				}
 			}
+			iter1 = iter2;
+			stop_glerror();
 		}
-		iter1 = iter2;
-		stop_glerror();
-	}
 
-	gGLLastMatrix = NULL;
-    gGL.matrixMode(LLRender::MM_MODELVIEW);
-	gGL.loadMatrix(gGLModelView);
+		gGLLastMatrix = NULL;
+		gGL.matrixMode(LLRender::MM_MODELVIEW);
+		gGL.loadMatrix(gGLModelView);
 
-	gGL.setColorMask(true, false);
+		gGL.setColorMask(true, false);
+
+	} // Tracy ZoneScoped
 }
 
 void LLPipeline::renderGeomPostDeferred(LLCamera& camera, bool do_occlusion)
@@ -11139,6 +11143,7 @@ void LLPipeline::generateImpostor(LLVOAvatar* avatar)
 		if (LLPipeline::sRenderDeferred)
 		{
 			GLuint buff = GL_COLOR_ATTACHMENT0;
+			LL_PROFILER_GPU_ZONEC( "gl.DrawBuffersARB", 0x8000FF );
 			glDrawBuffersARB(1, &buff);
 		}
 
diff --git a/indra/newview/pipeline.h b/indra/newview/pipeline.h
index 0eaa6b141d..5605d26410 100644
--- a/indra/newview/pipeline.h
+++ b/indra/newview/pipeline.h
@@ -574,7 +574,6 @@ public:
 	static bool				sDelayVBUpdate;
 	static bool				sAutoMaskAlphaDeferred;
 	static bool				sAutoMaskAlphaNonDeferred;
-	static bool				sDisableShaders; // if true, rendering will be done without shaders
 	static bool				sRenderTransparentWater;
 	static bool				sRenderBump;
 	static bool				sBakeSunlight;
diff --git a/indra/test/test.cpp b/indra/test/test.cpp
index 87c4a8d8a3..bb48216b2b 100644
--- a/indra/test/test.cpp
+++ b/indra/test/test.cpp
@@ -112,6 +112,7 @@ public:
 
 	virtual void recordMessage(LLError::ELevel level, const std::string& message)
 	{
+        LL_PROFILE_ZONE_SCOPED
 		mFile << message << std::endl;
 	}