|
|
|
|
@@ -29,33 +29,22 @@
|
|
|
|
|
* COMPLETENESS OR PERFORMANCE.
|
|
|
|
|
* $/LicenseInfo$
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include "linden_common.h"
|
|
|
|
|
|
|
|
|
|
#include "llfasttimer.h"
|
|
|
|
|
|
|
|
|
|
#include "llmemory.h"
|
|
|
|
|
#include "llprocessor.h"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#if LL_WINDOWS
|
|
|
|
|
#define WIN32_LEAN_AND_MEAN
|
|
|
|
|
#include <windows.h>
|
|
|
|
|
#include "lltimer.h"
|
|
|
|
|
#elif LL_LINUX || LL_SOLARIS
|
|
|
|
|
#include <sys/time.h>
|
|
|
|
|
#include <sched.h>
|
|
|
|
|
#include "lltimer.h"
|
|
|
|
|
#elif LL_DARWIN
|
|
|
|
|
#include <sys/time.h>
|
|
|
|
|
#include "lltimer.h" // get_clock_count()
|
|
|
|
|
#else
|
|
|
|
|
#error "architecture not supported"
|
|
|
|
|
#endif
|
|
|
|
|
#include "lltimer.h"
|
|
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
// statics
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LLFastTimer::EFastTimerType LLFastTimer::sCurType = LLFastTimer::FTM_OTHER;
|
|
|
|
|
int LLFastTimer::sCurDepth = 0;
|
|
|
|
|
U64 LLFastTimer::sStart[LLFastTimer::FTM_MAX_DEPTH];
|
|
|
|
|
@@ -70,44 +59,12 @@ S32 LLFastTimer::sLastFrameIndex = -1;
|
|
|
|
|
int LLFastTimer::sPauseHistory = 0;
|
|
|
|
|
int LLFastTimer::sResetHistory = 0;
|
|
|
|
|
|
|
|
|
|
#define USE_RDTSC 0
|
|
|
|
|
U64 LLFastTimer::sClockResolution = calc_clock_frequency(50U); // Resolution of get_clock_count()
|
|
|
|
|
|
|
|
|
|
#if LL_LINUX || LL_SOLARIS
|
|
|
|
|
U64 LLFastTimer::sClockResolution = 1000000000; // 1e9, Nanosecond resolution
|
|
|
|
|
#else
|
|
|
|
|
U64 LLFastTimer::sClockResolution = 1000000; // 1e6, Microsecond resolution
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//static
|
|
|
|
|
#if (LL_DARWIN || LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
|
|
|
|
|
U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer
|
|
|
|
|
{
|
|
|
|
|
return sClockResolution >> 8;
|
|
|
|
|
}
|
|
|
|
|
#else // windows or x86-mac or x86-linux or x86-solaris
|
|
|
|
|
U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer
|
|
|
|
|
{
|
|
|
|
|
#if USE_RDTSC || !LL_WINDOWS
|
|
|
|
|
//getCPUFrequency returns MHz and sCPUClockFrequency wants to be in Hz
|
|
|
|
|
static U64 sCPUClockFrequency = U64(LLProcessorInfo().getCPUFrequency()*1000000.0);
|
|
|
|
|
|
|
|
|
|
// we drop the low-order byte in our timers, so report a lower frequency
|
|
|
|
|
#else
|
|
|
|
|
// If we're not using RDTSC, each fasttimer tick is just a performance counter tick.
|
|
|
|
|
// Not redefining the clock frequency itself (in llprocessor.cpp/calculate_cpu_frequency())
|
|
|
|
|
// since that would change displayed MHz stats for CPUs
|
|
|
|
|
static bool firstcall = true;
|
|
|
|
|
static U64 sCPUClockFrequency;
|
|
|
|
|
if (firstcall)
|
|
|
|
|
{
|
|
|
|
|
QueryPerformanceFrequency((LARGE_INTEGER*)&sCPUClockFrequency);
|
|
|
|
|
firstcall = false;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
return sCPUClockFrequency >> 8;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
void LLFastTimer::reset()
|
|
|
|
|
{
|
|
|
|
|
@@ -162,139 +119,17 @@ void LLFastTimer::reset()
|
|
|
|
|
// Important note: These implementations must be FAST!
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#if LL_WINDOWS
|
|
|
|
|
//
|
|
|
|
|
// Windows implementation of CPU clock
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
|
// NOTE: put back in when we aren't using platform sdk anymore
|
|
|
|
|
//
|
|
|
|
|
// because MS has different signatures for these functions in winnt.h
|
|
|
|
|
// need to rename them to avoid conflicts
|
|
|
|
|
//#define _interlockedbittestandset _renamed_interlockedbittestandset
|
|
|
|
|
//#define _interlockedbittestandreset _renamed_interlockedbittestandreset
|
|
|
|
|
//#include <intrin.h>
|
|
|
|
|
//#undef _interlockedbittestandset
|
|
|
|
|
//#undef _interlockedbittestandreset
|
|
|
|
|
|
|
|
|
|
//inline U32 LLFastTimer::getCPUClockCount32()
|
|
|
|
|
//{
|
|
|
|
|
// U64 time_stamp = __rdtsc();
|
|
|
|
|
// return (U32)(time_stamp >> 8);
|
|
|
|
|
//}
|
|
|
|
|
//
|
|
|
|
|
//// return full timer value, *not* shifted by 8 bits
|
|
|
|
|
//inline U64 LLFastTimer::getCPUClockCount64()
|
|
|
|
|
//{
|
|
|
|
|
// return __rdtsc();
|
|
|
|
|
//}
|
|
|
|
|
|
|
|
|
|
// shift off lower 8 bits for lower resolution but longer term timing
|
|
|
|
|
// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing
|
|
|
|
|
#if USE_RDTSC
|
|
|
|
|
U32 LLFastTimer::getCPUClockCount32()
|
|
|
|
|
{
|
|
|
|
|
U32 ret_val;
|
|
|
|
|
__asm
|
|
|
|
|
{
|
|
|
|
|
_emit 0x0f
|
|
|
|
|
_emit 0x31
|
|
|
|
|
shr eax,8
|
|
|
|
|
shl edx,24
|
|
|
|
|
or eax, edx
|
|
|
|
|
mov dword ptr [ret_val], eax
|
|
|
|
|
}
|
|
|
|
|
return ret_val;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// return full timer value, *not* shifted by 8 bits
|
|
|
|
|
U64 LLFastTimer::getCPUClockCount64()
|
|
|
|
|
{
|
|
|
|
|
U64 ret_val;
|
|
|
|
|
__asm
|
|
|
|
|
{
|
|
|
|
|
_emit 0x0f
|
|
|
|
|
_emit 0x31
|
|
|
|
|
mov eax,eax
|
|
|
|
|
mov edx,edx
|
|
|
|
|
mov dword ptr [ret_val+4], edx
|
|
|
|
|
mov dword ptr [ret_val], eax
|
|
|
|
|
}
|
|
|
|
|
return ret_val;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::string LLFastTimer::sClockType = "rdtsc";
|
|
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
//LL_COMMON_API U64 get_clock_count(); // in lltimer.cpp
|
|
|
|
|
// These use QueryPerformanceCounter, which is arguably fine and also works on amd architectures.
|
|
|
|
|
// On windows these use QueryPerformanceCounter, which is arguably fine and also works on amd architectures.
|
|
|
|
|
U32 LLFastTimer::getCPUClockCount32()
|
|
|
|
|
{
|
|
|
|
|
return (U32)(get_clock_count()>>8);
|
|
|
|
|
return get_clock_count() >> 8;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
U64 LLFastTimer::getCPUClockCount64()
|
|
|
|
|
{
|
|
|
|
|
return get_clock_count();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::string LLFastTimer::sClockType = "QueryPerformanceCounter";
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#if (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
|
|
|
|
|
//
|
|
|
|
|
// Linux and Solaris implementation of CPU clock - non-x86.
|
|
|
|
|
// This is accurate but SLOW! Only use out of desperation.
|
|
|
|
|
//
|
|
|
|
|
// Try to use the MONOTONIC clock if available, this is a constant time counter
|
|
|
|
|
// with nanosecond resolution (but not necessarily accuracy) and attempts are
|
|
|
|
|
// made to synchronize this value between cores at kernel start. It should not
|
|
|
|
|
// be affected by CPU frequency. If not available use the REALTIME clock, but
|
|
|
|
|
// this may be affected by NTP adjustments or other user activity affecting
|
|
|
|
|
// the system time.
|
|
|
|
|
U64 LLFastTimer::getCPUClockCount64()
|
|
|
|
|
{
|
|
|
|
|
struct timespec tp;
|
|
|
|
|
|
|
|
|
|
#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time?
|
|
|
|
|
if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME
|
|
|
|
|
#endif
|
|
|
|
|
clock_gettime(CLOCK_REALTIME,&tp);
|
|
|
|
|
|
|
|
|
|
return (tp.tv_sec*LLFastTimer::sClockResolution)+tp.tv_nsec;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
U32 LLFastTimer::getCPUClockCount32()
|
|
|
|
|
{
|
|
|
|
|
return (U32)(LLFastTimer::getCPUClockCount64() >> 8);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::string LLFastTimer::sClockType = "clock_gettime";
|
|
|
|
|
|
|
|
|
|
#endif // (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__))
|
|
|
|
|
//
|
|
|
|
|
// Mac+Linux+Solaris FAST x86 implementation of CPU clock
|
|
|
|
|
U32 LLFastTimer::getCPUClockCount32()
|
|
|
|
|
{
|
|
|
|
|
U64 x;
|
|
|
|
|
__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
|
|
|
|
|
return (U32)(x >> 8);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
U64 LLFastTimer::getCPUClockCount64()
|
|
|
|
|
{
|
|
|
|
|
U64 x;
|
|
|
|
|
__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
|
|
|
|
|
return x;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::string LLFastTimer::sClockType = "rdtsc";
|
|
|
|
|
#endif
|