18 files changed, 681 insertions, 175 deletions
diff --git a/indra/llcommon/llassettype.cpp b/indra/llcommon/llassettype.cpp
index eb610f625a..145dddd543 100644
--- a/indra/llcommon/llassettype.cpp
+++ b/indra/llcommon/llassettype.cpp
@@ -93,8 +93,9 @@ LLAssetDictionary::LLAssetDictionary()
 
 	addEntry(LLAssetType::AT_LINK, 				new AssetEntry("LINK",				"link",		"sym link",			false,		false,		true));
 	addEntry(LLAssetType::AT_LINK_FOLDER, 		new AssetEntry("FOLDER_LINK",		"link_f", 	"sym folder link",	false,		false,		true));
+	addEntry(LLAssetType::AT_MESH,              new AssetEntry("MESH",              "mesh",     "mesh",             false, false, false));
+	addEntry(LLAssetType::AT_NONE, 				new AssetEntry("NONE",				"-1",		NULL,		  		FALSE,		FALSE,		FALSE));
 
-	addEntry(LLAssetType::AT_NONE, 				new AssetEntry("NONE",				"-1",		NULL,		  		false,		false,		false));
 };
 
 // static
diff --git a/indra/llcommon/llassettype.h b/indra/llcommon/llassettype.h
index c5ff2364cc..74ccd00324 100644
--- a/indra/llcommon/llassettype.h
+++ b/indra/llcommon/llassettype.h
@@ -108,8 +108,10 @@ public:
 
 		AT_LINK_FOLDER = 25,
 			// Inventory folder link
-		
-		AT_COUNT = 26,
+		AT_MESH = 49,
+		    // Mesh data in our proprietary SLM format
+
+		AT_COUNT = 50,
 
 			// +*********************************************************+
 			// |  TO ADD AN ELEMENT TO THIS ENUM:                        |
diff --git a/indra/llcommon/lldefs.h b/indra/llcommon/lldefs.h
index 6b38de6500..5a4b8325f4 100644
--- a/indra/llcommon/lldefs.h
+++ b/indra/llcommon/lldefs.h
@@ -236,5 +236,13 @@ inline LLDATATYPE llclampb(const LLDATATYPE& a)
 	return llmin(llmax(a, (LLDATATYPE)0), (LLDATATYPE)255);
 }
 
+template <class LLDATATYPE> 
+inline void llswap(LLDATATYPE& lhs, LLDATATYPE& rhs)
+{
+	LLDATATYPE tmp = lhs;
+	lhs = rhs;
+	rhs = tmp;
+}
+
 #endif // LL_LLDEFS_H
 
diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h
index 4ff93a553c..2b25f2fabb 100644..100755
--- a/indra/llcommon/llfasttimer.h
+++ b/indra/llcommon/llfasttimer.h
@@ -27,140 +27,9 @@
 #ifndef LL_FASTTIMER_H
 #define LL_FASTTIMER_H
 
+// Implementation of getCPUClockCount32() and getCPUClockCount64 are now in llfastertimer_class.cpp.
+
 // pull in the actual class definition
 #include "llfasttimer_class.h"
 
-//
-// Important note: These implementations must be FAST!
-//
-
-#if LL_WINDOWS
-//
-// Windows implementation of CPU clock
-//
-
-//
-// NOTE: put back in when we aren't using platform sdk anymore
-//
-// because MS has different signatures for these functions in winnt.h
-// need to rename them to avoid conflicts
-//#define _interlockedbittestandset _renamed_interlockedbittestandset
-//#define _interlockedbittestandreset _renamed_interlockedbittestandreset
-//#include <intrin.h>
-//#undef _interlockedbittestandset
-//#undef _interlockedbittestandreset
-
-//inline U32 LLFastTimer::getCPUClockCount32()
-//{
-//	U64 time_stamp = __rdtsc();
-//	return (U32)(time_stamp >> 8);
-//}
-//
-//// return full timer value, *not* shifted by 8 bits
-//inline U64 LLFastTimer::getCPUClockCount64()
-//{
-//	return __rdtsc();
-//}
-
-// shift off lower 8 bits for lower resolution but longer term timing
-// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing
-inline U32 LLFastTimer::getCPUClockCount32()
-{
-	U32 ret_val;
-	__asm
-	{
-        _emit   0x0f
-        _emit   0x31
-		shr eax,8
-		shl edx,24
-		or eax, edx
-		mov dword ptr [ret_val], eax
-	}
-    return ret_val;
-}
-
-// return full timer value, *not* shifted by 8 bits
-inline U64 LLFastTimer::getCPUClockCount64()
-{
-	U64 ret_val;
-	__asm
-	{
-        _emit   0x0f
-        _emit   0x31
-		mov eax,eax
-		mov edx,edx
-		mov dword ptr [ret_val+4], edx
-		mov dword ptr [ret_val], eax
-	}
-    return ret_val;
-}
-#endif
-
-
-#if (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
-//
-// Linux and Solaris implementation of CPU clock - non-x86.
-// This is accurate but SLOW!  Only use out of desperation.
-//
-// Try to use the MONOTONIC clock if available, this is a constant time counter
-// with nanosecond resolution (but not necessarily accuracy) and attempts are
-// made to synchronize this value between cores at kernel start. It should not
-// be affected by CPU frequency. If not available use the REALTIME clock, but
-// this may be affected by NTP adjustments or other user activity affecting
-// the system time.
-inline U64 LLFastTimer::getCPUClockCount64()
-{
-	struct timespec tp;
-	
-#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time?
-	if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME
-#endif
-		clock_gettime(CLOCK_REALTIME,&tp);
-
-	return (tp.tv_sec*LLFastTimer::sClockResolution)+tp.tv_nsec;        
-}
-
-inline U32 LLFastTimer::getCPUClockCount32()
-{
-	return (U32)(LLFastTimer::getCPUClockCount64() >> 8);
-}
-#endif // (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
-
-
-#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__))
-//
-// Mac+Linux+Solaris FAST x86 implementation of CPU clock
-inline U32 LLFastTimer::getCPUClockCount32()
-{
-	U64 x;
-	__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
-	return (U32)(x >> 8);
-}
-
-inline U64 LLFastTimer::getCPUClockCount64()
-{
-	U64 x;
-	__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
-	return x;
-}
-#endif
-
-
-#if ( LL_DARWIN && !(defined(__i386__) || defined(__amd64__)))
-//
-// Mac PPC (deprecated) implementation of CPU clock
-//
-// Just use gettimeofday implementation for now
-
-inline U32 LLFastTimer::getCPUClockCount32()
-{
-	return (U32)(get_clock_count()>>8);
-}
-
-inline U64 LLFastTimer::getCPUClockCount64()
-{
-	return get_clock_count();
-}
-#endif
-
 #endif // LL_LLFASTTIMER_H
diff --git a/indra/llcommon/llfasttimer_class.cpp b/indra/llcommon/llfasttimer_class.cpp
index bce87ada96..bd594b06cf 100644
--- a/indra/llcommon/llfasttimer_class.cpp
+++ b/indra/llcommon/llfasttimer_class.cpp
@@ -35,10 +35,13 @@
 
 #include <boost/bind.hpp>
 
+
 #if LL_WINDOWS
+#include "lltimer.h"
 #elif LL_LINUX || LL_SOLARIS
 #include <sys/time.h>
 #include <sched.h>
+#include "lltimer.h"
 #elif LL_DARWIN
 #include <sys/time.h>
 #include "lltimer.h"	// get_clock_count()
@@ -61,6 +64,8 @@ BOOL LLFastTimer::sMetricLog = FALSE;
 LLMutex* LLFastTimer::sLogLock = NULL;
 std::queue<LLSD> LLFastTimer::sLogQueue;
 
+#define USE_RDTSC 0
+
 #if LL_LINUX || LL_SOLARIS
 U64 LLFastTimer::sClockResolution = 1000000000; // Nanosecond resolution
 #else
@@ -234,10 +239,23 @@ U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer
 #else // windows or x86-mac or x86-linux or x86-solaris
 U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer
 {
+#if USE_RDTSC || !LL_WINDOWS
 	//getCPUFrequency returns MHz and sCPUClockFrequency wants to be in Hz
 	static U64 sCPUClockFrequency = U64(LLProcessorInfo().getCPUFrequency()*1000000.0);
 
 	// we drop the low-order byte in our timers, so report a lower frequency
+#else
+	// If we're not using RDTSC, each fasttimer tick is just a performance counter tick.
+	// Not redefining the clock frequency itself (in llprocessor.cpp/calculate_cpu_frequency())
+	// since that would change displayed MHz stats for CPUs
+	static bool firstcall = true;
+	static U64 sCPUClockFrequency;
+	if (firstcall)
+	{
+		QueryPerformanceFrequency((LARGE_INTEGER*)&sCPUClockFrequency);
+		firstcall = false;
+	}
+#endif
 	return sCPUClockFrequency >> 8;
 }
 #endif
@@ -482,6 +500,19 @@ void LLFastTimer::NamedTimer::resetFrame()
 {
 	if (sLog)
 	{ //output current frame counts to performance log
+
+		static S32 call_count = 0;
+		if (call_count % 100 == 0)
+		{
+			llinfos << "countsPerSecond (32 bit): " << countsPerSecond() << llendl;
+			llinfos << "get_clock_count (64 bit): " << get_clock_count() << llendl;
+			llinfos << "LLProcessorInfo().getCPUFrequency() " << LLProcessorInfo().getCPUFrequency() << llendl;
+			llinfos << "getCPUClockCount32() " << getCPUClockCount32() << llendl;
+			llinfos << "getCPUClockCount64() " << getCPUClockCount64() << llendl;
+			llinfos << "elapsed sec " << ((F64)getCPUClockCount64())/((F64)LLProcessorInfo().getCPUFrequency()*1000000.0) << llendl;
+		}
+		call_count++;
+
 		F64 iclock_freq = 1000.0 / countsPerSecond(); // good place to calculate clock frequency
 
 		F64 total_time = 0;
@@ -763,3 +794,144 @@ LLFastTimer::LLFastTimer(LLFastTimer::FrameState* state)
 
 
 //////////////////////////////////////////////////////////////////////////////
+//
+// Important note: These implementations must be FAST!
+//
+
+
+#if LL_WINDOWS
+//
+// Windows implementation of CPU clock
+//
+
+//
+// NOTE: put back in when we aren't using platform sdk anymore
+//
+// because MS has different signatures for these functions in winnt.h
+// need to rename them to avoid conflicts
+//#define _interlockedbittestandset _renamed_interlockedbittestandset
+//#define _interlockedbittestandreset _renamed_interlockedbittestandreset
+//#include <intrin.h>
+//#undef _interlockedbittestandset
+//#undef _interlockedbittestandreset
+
+//inline U32 LLFastTimer::getCPUClockCount32()
+//{
+//	U64 time_stamp = __rdtsc();
+//	return (U32)(time_stamp >> 8);
+//}
+//
+//// return full timer value, *not* shifted by 8 bits
+//inline U64 LLFastTimer::getCPUClockCount64()
+//{
+//	return __rdtsc();
+//}
+
+// shift off lower 8 bits for lower resolution but longer term timing
+// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing
+#if USE_RDTSC
+U32 LLFastTimer::getCPUClockCount32()
+{
+	U32 ret_val;
+	__asm
+	{
+        _emit   0x0f
+        _emit   0x31
+		shr eax,8
+		shl edx,24
+		or eax, edx
+		mov dword ptr [ret_val], eax
+	}
+    return ret_val;
+}
+
+// return full timer value, *not* shifted by 8 bits
+U64 LLFastTimer::getCPUClockCount64()
+{
+	U64 ret_val;
+	__asm
+	{
+        _emit   0x0f
+        _emit   0x31
+		mov eax,eax
+		mov edx,edx
+		mov dword ptr [ret_val+4], edx
+		mov dword ptr [ret_val], eax
+	}
+    return ret_val;
+}
+
+std::string LLFastTimer::sClockType = "rdtsc";
+
+#else
+//LL_COMMON_API U64 get_clock_count(); // in lltimer.cpp
+// These use QueryPerformanceCounter, which is arguably fine and also works on amd architectures.
+U32 LLFastTimer::getCPUClockCount32()
+{
+	return (U32)(get_clock_count()>>8);
+}
+
+U64 LLFastTimer::getCPUClockCount64()
+{
+	return get_clock_count();
+}
+
+std::string LLFastTimer::sClockType = "QueryPerformanceCounter";
+#endif
+
+#endif
+
+
+#if (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
+//
+// Linux and Solaris implementation of CPU clock - non-x86.
+// This is accurate but SLOW!  Only use out of desperation.
+//
+// Try to use the MONOTONIC clock if available, this is a constant time counter
+// with nanosecond resolution (but not necessarily accuracy) and attempts are
+// made to synchronize this value between cores at kernel start. It should not
+// be affected by CPU frequency. If not available use the REALTIME clock, but
+// this may be affected by NTP adjustments or other user activity affecting
+// the system time.
+U64 LLFastTimer::getCPUClockCount64()
+{
+	struct timespec tp;
+	
+#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time?
+	if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME
+#endif
+		clock_gettime(CLOCK_REALTIME,&tp);
+
+	return (tp.tv_sec*LLFastTimer::sClockResolution)+tp.tv_nsec;        
+}
+
+U32 LLFastTimer::getCPUClockCount32()
+{
+	return (U32)(LLFastTimer::getCPUClockCount64() >> 8);
+}
+
+std::string LLFastTimer::sClockType = "clock_gettime";
+
+#endif // (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
+
+
+#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__))
+//
+// Mac+Linux+Solaris FAST x86 implementation of CPU clock
+U32 LLFastTimer::getCPUClockCount32()
+{
+	U64 x;
+	__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
+	return (U32)(x >> 8);
+}
+
+U64 LLFastTimer::getCPUClockCount64()
+{
+	U64 x;
+	__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
+	return x;
+}
+
+std::string LLFastTimer::sClockType = "rdtsc";
+#endif
+
diff --git a/indra/llcommon/llfasttimer_class.h b/indra/llcommon/llfasttimer_class.h
index eb9789682b..827747f0c6 100644
--- a/indra/llcommon/llfasttimer_class.h
+++ b/indra/llcommon/llfasttimer_class.h
@@ -31,12 +31,15 @@
 
 #define FAST_TIMER_ON 1
 #define TIME_FAST_TIMERS 0
+#define DEBUG_FAST_TIMER_THREADS 1
 
 class LLMutex;
 
 #include <queue>
 #include "llsd.h"
 
+LL_COMMON_API void assert_main_thread();
+
 class LL_COMMON_API LLFastTimer
 {
 public:
@@ -176,6 +179,11 @@ public:
 		U64 timer_end = getCPUClockCount64();
 		sTimerCycles += timer_end - timer_start;
 #endif
+#if DEBUG_FAST_TIMER_THREADS
+#if !LL_RELEASE
+		assert_main_thread();
+#endif
+#endif
 	}
 
 	LL_FORCE_INLINE ~LLFastTimer()
@@ -245,6 +253,7 @@ public:
 		U32				mChildTime;
 	};
 	static CurTimerData		sCurTimerData;
+	static std::string sClockType;
 
 private:
 	static U32 getCPUClockCount32();
diff --git a/indra/llcommon/llfoldertype.cpp b/indra/llcommon/llfoldertype.cpp
index ebc79af412..c2cfb7286e 100644
--- a/indra/llcommon/llfoldertype.cpp
+++ b/indra/llcommon/llfoldertype.cpp
@@ -89,6 +89,9 @@ LLFolderDictionary::LLFolderDictionary()
 	addEntry(LLFolderType::FT_CURRENT_OUTFIT, 		new FolderEntry("current",	TRUE));
 	addEntry(LLFolderType::FT_OUTFIT, 				new FolderEntry("outfit",	FALSE));
 	addEntry(LLFolderType::FT_MY_OUTFITS, 			new FolderEntry("my_otfts",	TRUE));
+
+	addEntry(LLFolderType::FT_MESH, 				new FolderEntry("mesh",	TRUE));
+
 	addEntry(LLFolderType::FT_INBOX, 				new FolderEntry("inbox",	TRUE));
 		 
 	addEntry(LLFolderType::FT_NONE, 				new FolderEntry("-1",		FALSE));
diff --git a/indra/llcommon/llfoldertype.h b/indra/llcommon/llfoldertype.h
index 936fbed17d..cb32cb075b 100644
--- a/indra/llcommon/llfoldertype.h
+++ b/indra/llcommon/llfoldertype.h
@@ -80,9 +80,11 @@ public:
 		FT_OUTFIT = 47,
 		FT_MY_OUTFITS = 48,
 		
-		FT_INBOX = 49,
+		FT_MESH = 49,
 
-		FT_COUNT = 50,
+		FT_INBOX = 50,
+
+		FT_COUNT = 51,
 
 		FT_NONE = -1
 	};
diff --git a/indra/llcommon/llmd5.h b/indra/llcommon/llmd5.h
index c8acbbe591..1526e6ac3c 100644
--- a/indra/llcommon/llmd5.h
+++ b/indra/llcommon/llmd5.h
@@ -103,7 +103,7 @@ public:
   void				raw_digest(unsigned char *array) const;	// provide 16-byte array for binary data
   void				hex_digest(char *string) const;			// provide 33-byte array for ascii-hex string
 
-  friend std::ostream&   operator<< (std::ostream&, LLMD5 context);
+  friend LL_COMMON_API std::ostream&   operator<< (std::ostream&, LLMD5 context);
 
 private:
 
diff --git a/indra/llcommon/llmemory.cpp b/indra/llcommon/llmemory.cpp
index a502d1a7eb..06ad7736d3 100644
--- a/indra/llcommon/llmemory.cpp
+++ b/indra/llcommon/llmemory.cpp
@@ -67,25 +67,6 @@ void LLMemory::freeReserve()
 	reserveMem = NULL;
 }
 
-void* ll_allocate (size_t size)
-{
-	if (size == 0)
-	{
-		llwarns << "Null allocation" << llendl;
-	}
-	void *p = malloc(size);
-	if (p == NULL)
-	{
-		LLMemory::freeReserve();
-		llerrs << "Out of memory Error" << llendl;
-	}
-	return p;
-}
-
-void ll_release (void *p)
-{
-	free(p);
-}
 
 //----------------------------------------------------------------------------
 
diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h
index 9bf4248bb7..8d114f744b 100644
--- a/indra/llcommon/llmemory.h
+++ b/indra/llcommon/llmemory.h
@@ -1,39 +1,102 @@
-/** 
+/**
  * @file llmemory.h
  * @brief Memory allocation/deallocation header-stuff goes here.
  *
  * $LicenseInfo:firstyear=2002&license=viewerlgpl$
  * Second Life Viewer Source Code
  * Copyright (C) 2010, Linden Research, Inc.
- * 
+ *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation;
  * version 2.1 of the License only.
- * 
+ *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
- * 
+ *
  * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
  * $/LicenseInfo$
  */
 #ifndef LLMEMORY_H
 #define LLMEMORY_H
 
+#include <stdlib.h>
+
+// A not necessarily efficient, but general, aligned malloc http://stackoverflow.com/questions/196329/osx-lacks-memalign
+#if 0  //DON'T use ll_aligned_foo now that we use tcmalloc everywhere (tcmalloc aligns automatically at appropriate intervals)
+inline void* ll_aligned_malloc( size_t size, int align )
+{
+	void* mem = malloc( size + (align - 1) + sizeof(void*) );
+	char* aligned = ((char*)mem) + sizeof(void*);
+	aligned += align - ((uintptr_t)aligned & (align - 1));
+
+	((void**)aligned)[-1] = mem;
+	return aligned;
+}
 
+inline void ll_aligned_free( void* ptr )
+{
+	free( ((void**)ptr)[-1] );
+}
 
-extern S32 gTotalDAlloc;
-extern S32 gTotalDAUse;
-extern S32 gDACount;
+inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed with ll_aligned_free_16().
+{
+#if defined(LL_WINDOWS)
+	return _mm_malloc(size, 16);
+#elif defined(LL_DARWIN)
+	return malloc(size); // default osx malloc is 16 byte aligned.
+#else
+	void *rtn;
+	if (LL_LIKELY(0 == posix_memalign(&rtn, 16, size)))
+		return rtn;
+	else // bad alignment requested, or out of memory
+		return NULL;
+#endif
+}
 
-extern void* ll_allocate (size_t size);
-extern void ll_release (void *p);
+inline void ll_aligned_free_16(void *p)
+{
+#if defined(LL_WINDOWS)
+	_mm_free(p);
+#elif defined(LL_DARWIN)
+	return free(p);
+#else
+	free(p); // posix_memalign() is compatible with heap deallocator
+#endif
+}
+
+inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed with ll_aligned_free_32().
+{
+#if defined(LL_WINDOWS)
+	return _mm_malloc(size, 32);
+#elif defined(LL_DARWIN)
+	return ll_aligned_malloc( size, 32 );
+#else
+	void *rtn;
+	if (LL_LIKELY(0 == posix_memalign(&rtn, 32, size)))
+		return rtn;
+	else // bad alignment requested, or out of memory
+		return NULL;
+#endif
+}
+
+inline void ll_aligned_free_32(void *p)
+{
+#if defined(LL_WINDOWS)
+	_mm_free(p);
+#elif defined(LL_DARWIN)
+	ll_aligned_free( p );
+#else
+	free(p); // posix_memalign() is compatible with heap deallocator
+#endif
+}
+#endif
 
 class LL_COMMON_API LLMemory
 {
diff --git a/indra/llcommon/llrefcount.cpp b/indra/llcommon/llrefcount.cpp
index 55d0c85cbd..e1876599fc 100644
--- a/indra/llcommon/llrefcount.cpp
+++ b/indra/llcommon/llrefcount.cpp
@@ -29,9 +29,25 @@
 
 #include "llerror.h"
 
+#if LL_REF_COUNT_DEBUG
+#include "llthread.h"
+#include "llapr.h"
+#endif
+
 LLRefCount::LLRefCount(const LLRefCount& other)
 :	mRef(0)
 {
+#if LL_REF_COUNT_DEBUG
+	if(gAPRPoolp)
+	{
+		mMutexp = new LLMutex(gAPRPoolp) ;
+	}
+	else
+	{
+		mMutexp = NULL ;
+	}
+	mCrashAtUnlock = FALSE ;
+#endif
 }
 
 LLRefCount& LLRefCount::operator=(const LLRefCount&)
@@ -43,6 +59,17 @@ LLRefCount& LLRefCount::operator=(const LLRefCount&)
 LLRefCount::LLRefCount() :
 	mRef(0)
 {
+#if LL_REF_COUNT_DEBUG
+	if(gAPRPoolp)
+	{
+		mMutexp = new LLMutex(gAPRPoolp) ;
+	}
+	else
+	{
+		mMutexp = NULL ;
+	}
+	mCrashAtUnlock = FALSE ;
+#endif
 }
 
 LLRefCount::~LLRefCount()
@@ -51,4 +78,87 @@ LLRefCount::~LLRefCount()
 	{
 		llerrs << "deleting non-zero reference" << llendl;
 	}
+
+#if LL_REF_COUNT_DEBUG
+	if(gAPRPoolp)
+	{
+		delete mMutexp ;
+	}
+#endif
 }
+
+#if LL_REF_COUNT_DEBUG
+void LLRefCount::ref() const
+{ 
+	if(mMutexp)
+	{
+		if(mMutexp->isLocked()) 
+		{
+			mCrashAtUnlock = TRUE ;
+			llerrs << "the mutex is locked by the thread: " << mLockedThreadID 
+				<< " Current thread: " << LLThread::currentID() << llendl ;
+		}
+
+		mMutexp->lock() ;
+		mLockedThreadID = LLThread::currentID() ;
+
+		mRef++; 
+
+		if(mCrashAtUnlock)
+		{
+			while(1); //crash here.
+		}
+		mMutexp->unlock() ;
+	}
+	else
+	{
+		mRef++; 
+	}
+} 
+
+S32 LLRefCount::unref() const
+{
+	if(mMutexp)
+	{
+		if(mMutexp->isLocked()) 
+		{
+			mCrashAtUnlock = TRUE ;
+			llerrs << "the mutex is locked by the thread: " << mLockedThreadID 
+				<< " Current thread: " << LLThread::currentID() << llendl ;
+		}
+
+		mMutexp->lock() ;
+		mLockedThreadID = LLThread::currentID() ;
+		
+		llassert(mRef >= 1);
+		if (0 == --mRef) 
+		{
+			if(mCrashAtUnlock)
+			{
+				while(1); //crash here.
+			}
+			mMutexp->unlock() ;
+
+			delete this; 
+			return 0;
+		}
+
+		if(mCrashAtUnlock)
+		{
+			while(1); //crash here.
+		}
+		mMutexp->unlock() ;
+		return mRef;
+	}
+	else
+	{
+		llassert(mRef >= 1);
+		if (0 == --mRef) 
+		{
+			delete this; 
+			return 0;
+		}
+		return mRef;
+	}
+}	
+#endif
diff --git a/indra/llcommon/llrefcount.h b/indra/llcommon/llrefcount.h
index 19f008b15c..8eb5d53f3f 100644
--- a/indra/llcommon/llrefcount.h
+++ b/indra/llcommon/llrefcount.h
@@ -28,6 +28,11 @@
 
 #include <boost/noncopyable.hpp>
 
+#define LL_REF_COUNT_DEBUG 0
+#if LL_REF_COUNT_DEBUG
+class LLMutex ;
+#endif
+
 //----------------------------------------------------------------------------
 // RefCount objects should generally only be accessed by way of LLPointer<>'s
 // see llthread.h for LLThreadSafeRefCount
@@ -43,12 +48,16 @@ protected:
 public:
 	LLRefCount();
 
-	void ref() const
+#if LL_REF_COUNT_DEBUG
+	void ref() const ;
+	S32 unref() const ;
+#else
+	inline void ref() const
 	{ 
 		mRef++; 
 	} 
 
-	S32 unref() const
+	inline S32 unref() const
 	{
 		llassert(mRef >= 1);
 		if (0 == --mRef) 
@@ -58,6 +67,7 @@ public:
 		}
 		return mRef;
 	}	
+#endif
 
 	//NOTE: when passing around a const LLRefCount object, this can return different results
 	// at different types, since mRef is mutable
@@ -68,6 +78,12 @@ public:
 
 private: 
 	mutable S32	mRef; 
+
+#if LL_REF_COUNT_DEBUG
+	LLMutex*  mMutexp ;
+	mutable U32  mLockedThreadID ;
+	mutable BOOL mCrashAtUnlock ; 
+#endif
 };
 
 #endif
diff --git a/indra/llcommon/llsdserialize.cpp b/indra/llcommon/llsdserialize.cpp
index 10f460e8a6..f3cbfab77a 100644
--- a/indra/llcommon/llsdserialize.cpp
+++ b/indra/llcommon/llsdserialize.cpp
@@ -34,6 +34,12 @@
 #include <iostream>
 #include "apr_base64.h"
 
+#ifdef LL_STANDALONE
+# include <zlib.h>
+#else
+# include "zlib/zlib.h"  // for davep's dirty little zip functions
+#endif
+
 #if !LL_WINDOWS
 #include <netinet/in.h> // htonl & ntohl
 #endif
@@ -1983,3 +1989,185 @@ std::ostream& operator<<(std::ostream& s, const LLSD& llsd)
 	return s;
 }
 
+
+//dirty little zippers -- yell at davep if these are horrid
+
+//return a string containing gzipped bytes of binary serialized LLSD
+// VERY inefficient -- creates several copies of LLSD block in memory
+std::string zip_llsd(LLSD& data)
+{ 
+	std::stringstream llsd_strm;
+
+	LLSDSerialize::toBinary(data, llsd_strm);
+
+	const U32 CHUNK = 65536;
+
+	z_stream strm;
+	strm.zalloc = Z_NULL;
+	strm.zfree = Z_NULL;
+	strm.opaque = Z_NULL;
+
+	S32 ret = deflateInit(&strm, Z_BEST_COMPRESSION);
+	if (ret != Z_OK)
+	{
+		llwarns << "Failed to compress LLSD block." << llendl;
+		return std::string();
+	}
+
+	std::string source = llsd_strm.str();
+
+	U8 out[CHUNK];
+
+	strm.avail_in = source.size();
+	strm.next_in = (U8*) source.data();
+	U8* output = NULL;
+
+	U32 cur_size = 0;
+
+	U32 have = 0;
+
+	do
+	{
+		strm.avail_out = CHUNK;
+		strm.next_out = out;
+
+		ret = deflate(&strm, Z_FINISH);
+		if (ret == Z_OK || ret == Z_STREAM_END)
+		{ //copy result into output
+			if (strm.avail_out >= CHUNK)
+			{
+				llerrs << "WTF?" << llendl;
+			}
+
+			have = CHUNK-strm.avail_out;
+			output = (U8*) realloc(output, cur_size+have);
+			memcpy(output+cur_size, out, have);
+			cur_size += have;
+		}
+		else 
+		{
+			free(output);
+			llwarns << "Failed to compress LLSD block." << llendl;
+			return std::string();
+		}
+	}
+	while (ret == Z_OK);
+
+	std::string::size_type size = cur_size;
+
+	std::string result((char*) output, size);
+	deflateEnd(&strm);
+	free(output);
+
+#if 0 //verify results work with unzip_llsd
+	std::istringstream test(result);
+	LLSD test_sd;
+	if (!unzip_llsd(test_sd, test, result.size()))
+	{
+		llerrs << "Invalid compression result!" << llendl;
+	}
+#endif
+
+	return result;
+}
+
+//decompress a block of LLSD from provided istream
+// not very efficient -- creats a copy of decompressed LLSD block in memory
+// and deserializes from that copy using LLSDSerialize
+bool unzip_llsd(LLSD& data, std::istream& is, S32 size)
+{
+	U8* result = NULL;
+	U32 cur_size = 0;
+	z_stream strm;
+		
+	const U32 CHUNK = 65536;
+
+	U8 *in = new U8[size];
+	is.read((char*) in, size); 
+
+	U8 out[CHUNK];
+		
+	strm.zalloc = Z_NULL;
+	strm.zfree = Z_NULL;
+	strm.opaque = Z_NULL;
+	strm.avail_in = size;
+	strm.next_in = in;
+
+	S32 ret = inflateInit(&strm);
+
+	if (ret != Z_OK)
+	{
+		llerrs << "WTF?" << llendl;
+	}
+	
+	do
+	{
+		strm.avail_out = CHUNK;
+		strm.next_out = out;
+		ret = inflate(&strm, Z_NO_FLUSH);
+		if (ret == Z_STREAM_ERROR)
+		{
+			inflateEnd(&strm);
+			free(result);
+			delete [] in;
+			return false;
+		}
+		
+		switch (ret)
+		{
+		case Z_NEED_DICT:
+			ret = Z_DATA_ERROR;
+		case Z_DATA_ERROR:
+		case Z_MEM_ERROR:
+			inflateEnd(&strm);
+			free(result);
+			delete [] in;
+			return false;
+			break;
+		}
+
+		U32 have = CHUNK-strm.avail_out;
+
+		result = (U8*) realloc(result, cur_size + have);
+		memcpy(result+cur_size, out, have);
+		cur_size += have;
+
+	} while (ret == Z_OK);
+
+	inflateEnd(&strm);
+	delete [] in;
+
+	if (ret != Z_STREAM_END)
+	{
+		free(result);
+		return false;
+	}
+
+	//result now points to the decompressed LLSD block
+	{
+		std::string res_str((char*) result, cur_size);
+
+		std::string deprecated_header("<? LLSD/Binary ?>");
+
+		if (res_str.substr(0, deprecated_header.size()) == deprecated_header)
+		{
+			res_str = res_str.substr(deprecated_header.size()+1, cur_size);
+		}
+		cur_size = res_str.size();
+
+		std::istringstream istr(res_str);
+		
+		if (!LLSDSerialize::fromBinary(data, istr, cur_size))
+		{
+			llwarns << "Failed to unzip LLSD block" << llendl;
+			free(result);
+			return false;
+		}
+	}
+
+	free(result);
+	return true;
+}
+
+
+
diff --git a/indra/llcommon/llsdserialize.h b/indra/llcommon/llsdserialize.h
index 1f096b5254..99a3ea3cd4 100644
--- a/indra/llcommon/llsdserialize.h
+++ b/indra/llcommon/llsdserialize.h
@@ -790,4 +790,8 @@ public:
 	}
 };
 
+//dirty little zip functions -- yell at davep
+LL_COMMON_API std::string zip_llsd(LLSD& data);
+LL_COMMON_API bool unzip_llsd(LLSD& data, std::istream& is, S32 size);
+
 #endif // LL_LLSDSERIALIZE_H
diff --git a/indra/llcommon/llthread.cpp b/indra/llcommon/llthread.cpp
index 49d05ef411..b4617c5453 100644
--- a/indra/llcommon/llthread.cpp
+++ b/indra/llcommon/llthread.cpp
@@ -56,6 +56,21 @@
 // 
 //----------------------------------------------------------------------------
 
+#if !LL_DARWIN
+U32 ll_thread_local sThreadID = 0;
+#endif 
+
+U32 LLThread::sIDIter = 0;
+
+LL_COMMON_API void assert_main_thread()
+{
+	static U32 s_thread_id = LLThread::currentID();
+	if (LLThread::currentID() != s_thread_id)
+	{
+		llerrs << "Illegal execution outside main thread." << llendl;
+	}
+}
+
 //
 // Handed to the APR thread creation function
 //
@@ -63,10 +78,14 @@ void *APR_THREAD_FUNC LLThread::staticRun(apr_thread_t *apr_threadp, void *datap
 {
 	LLThread *threadp = (LLThread *)datap;
 
+#if !LL_DARWIN
+	sThreadID = threadp->mID;
+#endif
+
 	// Run the user supplied function
 	threadp->run();
 
-	llinfos << "LLThread::staticRun() Exiting: " << threadp->mName << llendl;
+	//llinfos << "LLThread::staticRun() Exiting: " << threadp->mName << llendl;
 	
 	// We're done with the run function, this thread is done executing now.
 	threadp->mStatus = STOPPED;
@@ -81,6 +100,8 @@ LLThread::LLThread(const std::string& name, apr_pool_t *poolp) :
 	mAPRThreadp(NULL),
 	mStatus(STOPPED)
 {
+	mID = ++sIDIter;
+
 	// Thread creation probably CAN be paranoid about APR being initialized, if necessary
 	if (poolp)
 	{
@@ -121,7 +142,7 @@ void LLThread::shutdown()
 			// First, set the flag that indicates that we're ready to die
 			setQuitting();
 
-			llinfos << "LLThread::~LLThread() Killing thread " << mName << " Status: " << mStatus << llendl;
+			//llinfos << "LLThread::~LLThread() Killing thread " << mName << " Status: " << mStatus << llendl;
 			// Now wait a bit for the thread to exit
 			// It's unclear whether I should even bother doing this - this destructor
 			// should netver get called unless we're already stopped, really...
@@ -143,7 +164,7 @@ void LLThread::shutdown()
 		if (!isStopped())
 		{
 			// This thread just wouldn't stop, even though we gave it time
-			llwarns << "LLThread::~LLThread() exiting thread before clean exit!" << llendl;
+			//llwarns << "LLThread::~LLThread() exiting thread before clean exit!" << llendl;
 			// Put a stake in its heart.
 			apr_thread_exit(mAPRThreadp, -1);
 			return;
@@ -283,7 +304,7 @@ void LLThread::wakeLocked()
 //============================================================================
 
 LLMutex::LLMutex(apr_pool_t *poolp) :
-	mAPRMutexp(NULL)
+	mAPRMutexp(NULL), mCount(0), mLockingThread(NO_THREAD)
 {
 	//if (poolp)
 	//{
@@ -315,7 +336,18 @@ LLMutex::~LLMutex()
 
 void LLMutex::lock()
 {
+#if LL_DARWIN
+	if (mLockingThread == LLThread::currentID())
+#else
+	if (mLockingThread == sThreadID)
+#endif
+	{ //redundant lock
+		mCount++;
+		return;
+	}
+	
 	apr_thread_mutex_lock(mAPRMutexp);
+	
 #if MUTEX_DEBUG
 	// Have to have the lock before we can access the debug info
 	U32 id = LLThread::currentID();
@@ -323,10 +355,22 @@ void LLMutex::lock()
 		llerrs << "Already locked in Thread: " << id << llendl;
 	mIsLocked[id] = TRUE;
 #endif
+
+#if LL_DARWIN
+	mLockingThread = LLThread::currentID();
+#else
+	mLockingThread = sThreadID;
+#endif
 }
 
 void LLMutex::unlock()
 {
+	if (mCount > 0)
+	{ //not the root unlock
+		mCount--;
+		return;
+	}
+	
 #if MUTEX_DEBUG
 	// Access the debug info while we have the lock
 	U32 id = LLThread::currentID();
@@ -334,6 +378,8 @@ void LLMutex::unlock()
 		llerrs << "Not locked in Thread: " << id << llendl;	
 	mIsLocked[id] = FALSE;
 #endif
+
+	mLockingThread = NO_THREAD;
 	apr_thread_mutex_unlock(mAPRMutexp);
 }
 
@@ -351,6 +397,11 @@ bool LLMutex::isLocked()
 	}
 }
 
+U32 LLMutex::lockingThread() const
+{
+	return mLockingThread;
+}
+
 //============================================================================
 
 LLCondition::LLCondition(apr_pool_t *poolp) :
@@ -371,6 +422,10 @@ LLCondition::~LLCondition()
 
 void LLCondition::wait()
 {
+	if (!isLocked())
+	{ //mAPRMutexp MUST be locked before calling apr_thread_cond_wait
+		apr_thread_mutex_lock(mAPRMutexp);
+	}
 	apr_thread_cond_wait(mAPRCondp, mAPRMutexp);
 }
 
diff --git a/indra/llcommon/llthread.h b/indra/llcommon/llthread.h
index f1c6cd75af..40291a2569 100644
--- a/indra/llcommon/llthread.h
+++ b/indra/llcommon/llthread.h
@@ -35,8 +35,17 @@ class LLThread;
 class LLMutex;
 class LLCondition;
 
+#if LL_WINDOWS
+#define ll_thread_local __declspec(thread)
+#else
+#define ll_thread_local __thread
+#endif
+
 class LL_COMMON_API LLThread
 {
+private:
+	static U32 sIDIter;
+
 public:
 	typedef enum e_thread_status
 	{
@@ -77,6 +86,8 @@ public:
 	apr_pool_t *getAPRPool() { return mAPRPoolp; }
 	LLVolatileAPRPool* getLocalAPRFilePool() { return mLocalAPRFilePoolp ; }
 
+	U32 getID() const { return mID; }
+
 private:
 	BOOL				mPaused;
 	
@@ -91,6 +102,7 @@ protected:
 	apr_pool_t			*mAPRPoolp;
 	BOOL				mIsLocalPool;
 	EThreadStatus		mStatus;
+	U32					mID;
 
 	//a local apr_pool for APRFile operations in this thread. If it exists, LLAPRFile::sAPRFilePoolp should not be used.
 	//Note: this pool is used by APRFile ONLY, do NOT use it for any other purposes.
@@ -128,17 +140,27 @@ protected:
 class LL_COMMON_API LLMutex
 {
 public:
+	typedef enum
+	{
+		NO_THREAD = 0xFFFFFFFF
+	} e_locking_thread;
+
 	LLMutex(apr_pool_t *apr_poolp); // NULL pool constructs a new pool for the mutex
 	virtual ~LLMutex();
 	
 	void lock();		// blocks
 	void unlock();
 	bool isLocked(); 	// non-blocking, but does do a lock/unlock so not free
+	U32 lockingThread() const; //get ID of locking thread
 	
 protected:
 	apr_thread_mutex_t *mAPRMutexp;
+	mutable U32			mCount;
+	mutable U32			mLockingThread;
+	
 	apr_pool_t			*mAPRPoolp;
 	BOOL				mIsLocalPool;
+	
 #if MUTEX_DEBUG
 	std::map<U32, BOOL> mIsLocked;
 #endif
diff --git a/indra/llcommon/stdenums.h b/indra/llcommon/stdenums.h
index 9f86de124e..556eff8370 100644
--- a/indra/llcommon/stdenums.h
+++ b/indra/llcommon/stdenums.h
@@ -49,7 +49,8 @@ enum EDragAndDropType
 	DAD_ANIMATION		= 12,
 	DAD_GESTURE			= 13,
 	DAD_LINK			= 14,
-	DAD_COUNT			= 15,   // number of types in this enum
+	DAD_MESH           		= 15,
+	DAD_COUNT			= 16,   // number of types in this enum
 };
 
 // Reasons for drags to be denied.