From 056f0983029000041555ca53c61cbe5e8689cae9 Mon Sep 17 00:00:00 2001 From: Nicky Date: Fri, 22 Apr 2016 12:58:51 +0200 Subject: Windows x64: Cannot use inline assembly. (transplanted from ee32840fc591f5529a0b544243e7b4146eb8f531) --- indra/llcommon/llfasttimer.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'indra/llcommon/llfasttimer.h') diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h index f56e5596f5..0336f9d0e9 100644 --- a/indra/llcommon/llfasttimer.h +++ b/indra/llcommon/llfasttimer.h @@ -91,6 +91,7 @@ public: static U32 getCPUClockCount32() { U32 ret_val; +#if !defined(_M_AMD64) __asm { _emit 0x0f @@ -100,6 +101,11 @@ public: or eax, edx mov dword ptr [ret_val], eax } +#else + unsigned __int64 val = __rdtsc(); + val = val >> 8; + ret_val = static_cast(val); +#endif return ret_val; } @@ -107,6 +113,7 @@ public: static U64 getCPUClockCount64() { U64 ret_val; +#if !defined(_M_AMD64) __asm { _emit 0x0f @@ -116,6 +123,9 @@ public: mov dword ptr [ret_val+4], edx mov dword ptr [ret_val], eax } +#else + ret_val = static_cast( __rdtsc() ); +#endif return ret_val; } -- cgit v1.2.3 From c87d24ac71c662ab37b6b937f92d960c6d8d092f Mon Sep 17 00:00:00 2001 From: Nicky Date: Fri, 22 Apr 2016 23:59:28 +0200 Subject: Fasttimers: Windows) Always use the __rdtsc() intrinsic rather than inline assembly. Linux/OSX) The rtdsc assembly intruction is clobbering EAX and EDX, the snippet was not protecting EDX accordingly. (transplanted from 6307b134f821390367d4c86a03b9a492ac7ed282) --- indra/llcommon/llfasttimer.h | 44 ++++++++------------------------------------ 1 file changed, 8 insertions(+), 36 deletions(-) (limited to 'indra/llcommon/llfasttimer.h') diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h index 0336f9d0e9..2024d707da 100644 --- a/indra/llcommon/llfasttimer.h +++ b/indra/llcommon/llfasttimer.h @@ -90,43 +90,15 @@ public: #if LL_FASTTIMER_USE_RDTSC static U32 getCPUClockCount32() { - U32 ret_val; -#if !defined(_M_AMD64) - __asm - { - _emit 0x0f - _emit 0x31 - shr eax,8 - shl edx,24 - or eax, edx - mov dword ptr [ret_val], eax - } -#else unsigned __int64 val = __rdtsc(); val = val >> 8; - ret_val = static_cast(val); -#endif - return ret_val; + return static_cast(val); } // return full timer value, *not* shifted by 8 bits static U64 getCPUClockCount64() { - U64 ret_val; -#if !defined(_M_AMD64) - __asm - { - _emit 0x0f - _emit 0x31 - mov eax,eax - mov edx,edx - mov dword ptr [ret_val+4], edx - mov dword ptr [ret_val], eax - } -#else - ret_val = static_cast( __rdtsc() ); -#endif - return ret_val; + return static_cast( __rdtsc() ); } #else @@ -183,16 +155,16 @@ public: // Mac+Linux+Solaris FAST x86 implementation of CPU clock static U32 getCPUClockCount32() { - U64 x; - __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); - return (U32)(x >> 8); + U32 low(0),high(0); + __asm__ volatile (".byte 0x0f, 0x31": "=a"(low), "=d"(high) ); + return (low>>8) | (high<<24); } static U64 getCPUClockCount64() { - U64 x; - __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); - return x; + U32 low(0),high(0); + __asm__ volatile (".byte 0x0f, 0x31": "=a"(low), "=d"(high) ); + return (U64)low | ( ((U64)high) << 32); } #endif -- cgit v1.2.3