diff options
Diffstat (limited to 'indra/llrender')
-rw-r--r-- | indra/llrender/llgl.cpp | 14 | ||||
-rw-r--r-- | indra/llrender/llgl.h | 1 | ||||
-rw-r--r-- | indra/llrender/llglheaders.h | 16 | ||||
-rw-r--r-- | indra/llrender/llglslshader.cpp | 228 | ||||
-rw-r--r-- | indra/llrender/llglslshader.h | 27 | ||||
-rw-r--r-- | indra/llrender/llvertexbuffer.cpp | 14 |
6 files changed, 297 insertions, 3 deletions
diff --git a/indra/llrender/llgl.cpp b/indra/llrender/llgl.cpp index 5429489075..81a97c7661 100644 --- a/indra/llrender/llgl.cpp +++ b/indra/llrender/llgl.cpp @@ -216,6 +216,11 @@ PFNGLGETQUERYIVARBPROC glGetQueryivARB = NULL; PFNGLGETQUERYOBJECTIVARBPROC glGetQueryObjectivARB = NULL; PFNGLGETQUERYOBJECTUIVARBPROC glGetQueryObjectuivARB = NULL; +// GL_ARB_timer_query +PFNGLQUERYCOUNTERPROC glQueryCounter = NULL; +PFNGLGETQUERYOBJECTI64VPROC glGetQueryObjecti64v = NULL; +PFNGLGETQUERYOBJECTUI64VPROC glGetQueryObjectui64v = NULL; + // GL_ARB_point_parameters PFNGLPOINTPARAMETERFARBPROC glPointParameterfARB = NULL; PFNGLPOINTPARAMETERFVARBPROC glPointParameterfvARB = NULL; @@ -421,6 +426,7 @@ LLGLManager::LLGLManager() : mHasFragmentShader(FALSE), mNumTextureImageUnits(0), mHasOcclusionQuery(FALSE), + mHasTimerQuery(FALSE), mHasOcclusionQuery2(FALSE), mHasPointParameters(FALSE), mHasDrawBuffers(FALSE), @@ -956,6 +962,7 @@ void LLGLManager::initExtensions() mHasARBEnvCombine = ExtensionExists("GL_ARB_texture_env_combine", gGLHExts.mSysExts); mHasCompressedTextures = glh_init_extensions("GL_ARB_texture_compression"); mHasOcclusionQuery = ExtensionExists("GL_ARB_occlusion_query", gGLHExts.mSysExts); + mHasTimerQuery = ExtensionExists("GL_ARB_timer_query", gGLHExts.mSysExts); mHasOcclusionQuery2 = ExtensionExists("GL_ARB_occlusion_query2", gGLHExts.mSysExts); mHasVertexBufferObject = ExtensionExists("GL_ARB_vertex_buffer_object", gGLHExts.mSysExts); mHasVertexArrayObject = ExtensionExists("GL_ARB_vertex_array_object", gGLHExts.mSysExts); @@ -1271,6 +1278,13 @@ void LLGLManager::initExtensions() glGetQueryObjectivARB = (PFNGLGETQUERYOBJECTIVARBPROC)GLH_EXT_GET_PROC_ADDRESS("glGetQueryObjectivARB"); glGetQueryObjectuivARB = (PFNGLGETQUERYOBJECTUIVARBPROC)GLH_EXT_GET_PROC_ADDRESS("glGetQueryObjectuivARB"); } + if (mHasTimerQuery) + { + llinfos << "initExtensions() TimerQuery-related procs..." << llendl; + glQueryCounter = (PFNGLQUERYCOUNTERPROC) GLH_EXT_GET_PROC_ADDRESS("glQueryCounter"); + glGetQueryObjecti64v = (PFNGLGETQUERYOBJECTI64VPROC) GLH_EXT_GET_PROC_ADDRESS("glGetQueryObjecti64v"); + glGetQueryObjectui64v = (PFNGLGETQUERYOBJECTUI64VPROC) GLH_EXT_GET_PROC_ADDRESS("glGetQueryObjectui64v"); + } if (mHasPointParameters) { llinfos << "initExtensions() PointParameters-related procs..." << llendl; diff --git a/indra/llrender/llgl.h b/indra/llrender/llgl.h index 767d49cbf2..60597fd090 100644 --- a/indra/llrender/llgl.h +++ b/indra/llrender/llgl.h @@ -98,6 +98,7 @@ public: BOOL mHasFragmentShader; S32 mNumTextureImageUnits; BOOL mHasOcclusionQuery; + BOOL mHasTimerQuery; BOOL mHasOcclusionQuery2; BOOL mHasPointParameters; BOOL mHasDrawBuffers; diff --git a/indra/llrender/llglheaders.h b/indra/llrender/llglheaders.h index 509de51f4d..dace572953 100644 --- a/indra/llrender/llglheaders.h +++ b/indra/llrender/llglheaders.h @@ -116,6 +116,11 @@ extern PFNGLGETQUERYIVARBPROC glGetQueryivARB; extern PFNGLGETQUERYOBJECTIVARBPROC glGetQueryObjectivARB; extern PFNGLGETQUERYOBJECTUIVARBPROC glGetQueryObjectuivARB; +// GL_ARB_timer_query +extern PFNGLQUERYCOUNTERPROC glQueryCounter; +extern PFNGLGETQUERYOBJECTI64VPROC glGetQueryObjecti64v; +extern PFNGLGETQUERYOBJECTUI64VPROC glGetQueryObjectui64v; + // GL_ARB_point_parameters extern PFNGLPOINTPARAMETERFARBPROC glPointParameterfARB; extern PFNGLPOINTPARAMETERFVARBPROC glPointParameterfvARB; @@ -378,6 +383,11 @@ extern PFNGLGETQUERYIVARBPROC glGetQueryivARB; extern PFNGLGETQUERYOBJECTIVARBPROC glGetQueryObjectivARB; extern PFNGLGETQUERYOBJECTUIVARBPROC glGetQueryObjectuivARB; +// GL_ARB_timer_query +extern PFNGLQUERYCOUNTERPROC glQueryCounter; +extern PFNGLGETQUERYOBJECTI64VPROC glGetQueryObjecti64v; +extern PFNGLGETQUERYOBJECTUI64VPROC glGetQueryObjectui64v; + // GL_ARB_point_parameters extern PFNGLPOINTPARAMETERFARBPROC glPointParameterfARB; extern PFNGLPOINTPARAMETERFVARBPROC glPointParameterfvARB; @@ -619,6 +629,12 @@ extern PFNGLGETQUERYIVARBPROC glGetQueryivARB; extern PFNGLGETQUERYOBJECTIVARBPROC glGetQueryObjectivARB; extern PFNGLGETQUERYOBJECTUIVARBPROC glGetQueryObjectuivARB; +// GL_ARB_timer_query +extern PFNGLQUERYCOUNTERPROC glQueryCounter; +extern PFNGLGETQUERYOBJECTI64VPROC glGetQueryObjecti64v; +extern PFNGLGETQUERYOBJECTUI64VPROC glGetQueryObjectui64v; + + // GL_ARB_point_parameters extern PFNGLPOINTPARAMETERFARBPROC glPointParameterfARB; extern PFNGLPOINTPARAMETERFVARBPROC glPointParameterfvARB; diff --git a/indra/llrender/llglslshader.cpp b/indra/llrender/llglslshader.cpp index 39ad9b9e9b..191c9862f0 100644 --- a/indra/llrender/llglslshader.cpp +++ b/indra/llrender/llglslshader.cpp @@ -53,6 +53,12 @@ GLhandleARB LLGLSLShader::sCurBoundShader = 0; LLGLSLShader* LLGLSLShader::sCurBoundShaderPtr = NULL; S32 LLGLSLShader::sIndexedTextureChannels = 0; bool LLGLSLShader::sNoFixedFunction = false; +bool LLGLSLShader::sProfileEnabled = false; +std::set<LLGLSLShader*> LLGLSLShader::sInstances; +U64 LLGLSLShader::sTotalTimeElapsed = 0; +U32 LLGLSLShader::sTotalTrianglesDrawn = 0; +U64 LLGLSLShader::sTotalSamplesDrawn = 0; +U32 LLGLSLShader::sTotalDrawCalls = 0; //UI shader -- declared here so llui_libtest will link properly LLGLSLShader gUIProgram; @@ -87,19 +93,188 @@ LLShaderFeatures::LLShaderFeatures() //=============================== // LLGLSL Shader implementation //=============================== + +//static +void LLGLSLShader::initProfile() +{ + sProfileEnabled = true; + sTotalTimeElapsed = 0; + sTotalTrianglesDrawn = 0; + sTotalSamplesDrawn = 0; + sTotalDrawCalls = 0; + + for (std::set<LLGLSLShader*>::iterator iter = sInstances.begin(); iter != sInstances.end(); ++iter) + { + (*iter)->clearStats(); + } +} + + +struct LLGLSLShaderCompareTimeElapsed +{ + bool operator()(const LLGLSLShader* const& lhs, const LLGLSLShader* const& rhs) + { + return lhs->mTimeElapsed < rhs->mTimeElapsed; + } +}; + +//static +void LLGLSLShader::finishProfile() +{ + sProfileEnabled = false; + + std::vector<LLGLSLShader*> sorted; + + for (std::set<LLGLSLShader*>::iterator iter = sInstances.begin(); iter != sInstances.end(); ++iter) + { + sorted.push_back(*iter); + } + + std::sort(sorted.begin(), sorted.end(), LLGLSLShaderCompareTimeElapsed()); + + for (std::vector<LLGLSLShader*>::iterator iter = sorted.begin(); iter != sorted.end(); ++iter) + { + (*iter)->dumpStats(); + } + + llinfos << "-----------------------------------" << llendl; + llinfos << "Total rendering time: " << llformat("%.4f ms", sTotalTimeElapsed/1000000.f) << llendl; + llinfos << "Total samples drawn: " << llformat("%.4f million", sTotalSamplesDrawn/1000000.f) << llendl; + llinfos << "Total triangles drawn: " << llformat("%.3f million", sTotalTrianglesDrawn/1000000.f) << llendl; +} + +void LLGLSLShader::clearStats() +{ + mTrianglesDrawn = 0; + mTimeElapsed = 0; + mSamplesDrawn = 0; + mDrawCalls = 0; +} + +void LLGLSLShader::dumpStats() +{ + if (mDrawCalls > 0) + { + llinfos << "=============================================" << llendl; + llinfos << mName << llendl; + for (U32 i = 0; i < mShaderFiles.size(); ++i) + { + llinfos << mShaderFiles[i].first << llendl; + } + llinfos << "=============================================" << llendl; + + F32 ms = mTimeElapsed/1000000.f; + F32 seconds = ms/1000.f; + + F32 pct_tris = (F32) mTrianglesDrawn/(F32)sTotalTrianglesDrawn*100.f; + F32 tris_sec = (F32) (mTrianglesDrawn/1000000.0); + tris_sec /= seconds; + + F32 pct_samples = (F32) ((F64)mSamplesDrawn/(F64)sTotalSamplesDrawn)*100.f; + F32 samples_sec = (F32) mSamplesDrawn/1000000000.0; + samples_sec /= seconds; + + F32 pct_calls = (F32) mDrawCalls/(F32)sTotalDrawCalls*100.f; + U32 avg_batch = mTrianglesDrawn/mDrawCalls; + + llinfos << "Triangles Drawn: " << mTrianglesDrawn << " " << llformat("(%.2f pct of total, %.3f million/sec)", pct_tris, tris_sec ) << llendl; + llinfos << "Draw Calls: " << mDrawCalls << " " << llformat("(%.2f pct of total, avg %d tris/call)", pct_calls, avg_batch) << llendl; + llinfos << "SamplesDrawn: " << mSamplesDrawn << " " << llformat("(%.2f pct of total, %.3f billion/sec)", pct_samples, samples_sec) << llendl; + llinfos << "Time Elapsed: " << mTimeElapsed << " " << llformat("(%.2f pct of total, %.5f ms)\n", (F32) ((F64)mTimeElapsed/(F64)sTotalTimeElapsed)*100.f, ms) << llendl; + } +} + +//static +void LLGLSLShader::startProfile() +{ + if (sProfileEnabled && sCurBoundShaderPtr) + { + sCurBoundShaderPtr->placeProfileQuery(); + } + +} + +//static +void LLGLSLShader::stopProfile(U32 count, U32 mode) +{ + if (sProfileEnabled) + { + sCurBoundShaderPtr->readProfileQuery(count, mode); + } +} + +void LLGLSLShader::placeProfileQuery() +{ +#if !LL_DARWIN + if (mTimerQuery == 0) + { + glGenQueriesARB(1, &mTimerQuery); + } + + glBeginQueryARB(GL_SAMPLES_PASSED, 1); + glBeginQueryARB(GL_TIME_ELAPSED, mTimerQuery); +#endif +} + +void LLGLSLShader::readProfileQuery(U32 count, U32 mode) +{ +#if !LL_DARWIN + glEndQueryARB(GL_TIME_ELAPSED); + glEndQueryARB(GL_SAMPLES_PASSED); + + U64 time_elapsed = 0; + glGetQueryObjectui64v(mTimerQuery, GL_QUERY_RESULT, &time_elapsed); + + U64 samples_passed = 0; + glGetQueryObjectui64v(1, GL_QUERY_RESULT, &samples_passed); + + sTotalTimeElapsed += time_elapsed; + mTimeElapsed += time_elapsed; + + sTotalSamplesDrawn += samples_passed; + mSamplesDrawn += samples_passed; + + U32 tri_count = 0; + switch (mode) + { + case LLRender::TRIANGLES: tri_count = count/3; break; + case LLRender::TRIANGLE_FAN: tri_count = count-2; break; + case LLRender::TRIANGLE_STRIP: tri_count = count-2; break; + default: tri_count = count; break; //points lines etc just use primitive count + } + + mTrianglesDrawn += tri_count; + sTotalTrianglesDrawn += tri_count; + + sTotalDrawCalls++; + mDrawCalls++; +#endif +} + + + LLGLSLShader::LLGLSLShader() : mProgramObject(0), mAttributeMask(0), + mTotalUniformSize(0), mActiveTextureChannels(0), mShaderLevel(0), mShaderGroup(SG_DEFAULT), - mUniformsDirty(FALSE) + mUniformsDirty(FALSE), + mTimerQuery(0) { + +} +LLGLSLShader::~LLGLSLShader() +{ + } void LLGLSLShader::unload() { + sInstances.erase(this); + stop_glerror(); mAttribute.clear(); mTexture.clear(); @@ -138,6 +313,8 @@ BOOL LLGLSLShader::createShader(vector<string> * attributes, U32 varying_count, const char** varyings) { + sInstances.insert(this); + //reloading, reset matrix hash values for (U32 i = 0; i < LLRender::NUM_MATRIX_MODES; ++i) { @@ -333,11 +510,56 @@ void LLGLSLShader::mapUniform(GLint index, const vector<string> * uniforms) GLenum type; GLsizei length; - GLint size; + GLint size = -1; char name[1024]; /* Flawfinder: ignore */ name[0] = 0; + glGetActiveUniformARB(mProgramObject, index, 1024, &length, &size, &type, (GLcharARB *)name); +#if !LL_DARWIN + if (size > 0) + { + switch(type) + { + case GL_FLOAT_VEC2: size *= 2; break; + case GL_FLOAT_VEC3: size *= 3; break; + case GL_FLOAT_VEC4: size *= 4; break; + case GL_DOUBLE: size *= 2; break; + case GL_DOUBLE_VEC2: size *= 2; break; + case GL_DOUBLE_VEC3: size *= 6; break; + case GL_DOUBLE_VEC4: size *= 8; break; + case GL_INT_VEC2: size *= 2; break; + case GL_INT_VEC3: size *= 3; break; + case GL_INT_VEC4: size *= 4; break; + case GL_UNSIGNED_INT_VEC2: size *= 2; break; + case GL_UNSIGNED_INT_VEC3: size *= 3; break; + case GL_UNSIGNED_INT_VEC4: size *= 4; break; + case GL_BOOL_VEC2: size *= 2; break; + case GL_BOOL_VEC3: size *= 3; break; + case GL_BOOL_VEC4: size *= 4; break; + case GL_FLOAT_MAT2: size *= 4; break; + case GL_FLOAT_MAT3: size *= 9; break; + case GL_FLOAT_MAT4: size *= 16; break; + case GL_FLOAT_MAT2x3: size *= 6; break; + case GL_FLOAT_MAT2x4: size *= 8; break; + case GL_FLOAT_MAT3x2: size *= 6; break; + case GL_FLOAT_MAT3x4: size *= 12; break; + case GL_FLOAT_MAT4x2: size *= 8; break; + case GL_FLOAT_MAT4x3: size *= 12; break; + case GL_DOUBLE_MAT2: size *= 8; break; + case GL_DOUBLE_MAT3: size *= 18; break; + case GL_DOUBLE_MAT4: size *= 32; break; + case GL_DOUBLE_MAT2x3: size *= 12; break; + case GL_DOUBLE_MAT2x4: size *= 16; break; + case GL_DOUBLE_MAT3x2: size *= 12; break; + case GL_DOUBLE_MAT3x4: size *= 24; break; + case GL_DOUBLE_MAT4x2: size *= 16; break; + case GL_DOUBLE_MAT4x3: size *= 24; break; + } + mTotalUniformSize += size; + } +#endif + S32 location = glGetUniformLocationARB(mProgramObject, name); if (location != -1) { @@ -408,6 +630,7 @@ BOOL LLGLSLShader::mapUniforms(const vector<string> * uniforms) { BOOL res = TRUE; + mTotalUniformSize = 0; mActiveTextureChannels = 0; mUniform.clear(); mUniformMap.clear(); @@ -431,6 +654,7 @@ BOOL LLGLSLShader::mapUniforms(const vector<string> * uniforms) unbind(); + LL_DEBUGS("ShaderLoading") << "Total Uniform Size: " << mTotalUniformSize << llendl; return res; } diff --git a/indra/llrender/llglslshader.h b/indra/llrender/llglslshader.h index 725a7e2573..d67ece018f 100644 --- a/indra/llrender/llglslshader.h +++ b/indra/llrender/llglslshader.h @@ -67,14 +67,29 @@ public: SG_WATER }; + static std::set<LLGLSLShader*> sInstances; + static bool sProfileEnabled; + LLGLSLShader(); + ~LLGLSLShader(); static GLhandleARB sCurBoundShader; static LLGLSLShader* sCurBoundShaderPtr; static S32 sIndexedTextureChannels; static bool sNoFixedFunction; + static void initProfile(); + static void finishProfile(); + + static void startProfile(); + static void stopProfile(U32 count, U32 mode); + void unload(); + void clearStats(); + void dumpStats(); + void placeProfileQuery(); + void readProfileQuery(U32 count, U32 mode); + BOOL createShader(std::vector<std::string> * attributes, std::vector<std::string> * uniforms, U32 varying_count = 0, @@ -157,6 +172,7 @@ public: std::map<std::string, GLint> mUniformMap; //lookup map of uniform name to uniform location std::map<GLint, LLVector4> mValue; //lookup map of uniform location to last known value std::vector<GLint> mTexture; + S32 mTotalUniformSize; S32 mActiveTextureChannels; S32 mShaderLevel; S32 mShaderGroup; @@ -165,6 +181,17 @@ public: std::vector< std::pair< std::string, GLenum > > mShaderFiles; std::string mName; boost::unordered_map<std::string, std::string> mDefines; + + //statistcis for profiling shader performance + U32 mTimerQuery; + U64 mTimeElapsed; + static U64 sTotalTimeElapsed; + U32 mTrianglesDrawn; + static U32 sTotalTrianglesDrawn; + U64 mSamplesDrawn; + static U64 sTotalSamplesDrawn; + U32 mDrawCalls; + static U32 sTotalDrawCalls; }; //UI shader (declared here so llui_libtest will link properly) diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp index 29fe5400a3..6108c6f821 100644 --- a/indra/llrender/llvertexbuffer.cpp +++ b/indra/llrender/llvertexbuffer.cpp @@ -613,8 +613,9 @@ void LLVertexBuffer::drawArrays(U32 mode, const std::vector<LLVector3>& pos, con glVertexPointer(3, GL_FLOAT, 0, pos[0].mV); glNormalPointer(GL_FLOAT, 0, norm[0].mV); } - + LLGLSLShader::startProfile(); glDrawArrays(sGLMode[mode], 0, count); + LLGLSLShader::stopProfile(count, mode); } //static @@ -651,7 +652,9 @@ void LLVertexBuffer::drawElements(U32 mode, const LLVector4a* pos, const LLVecto glVertexPointer(3, GL_FLOAT, 16, pos); } + LLGLSLShader::startProfile(); glDrawElements(sGLMode[mode], num_indices, GL_UNSIGNED_SHORT, indicesp); + LLGLSLShader::stopProfile(num_indices, mode); } void LLVertexBuffer::validateRange(U32 start, U32 end, U32 count, U32 indices_offset) const @@ -751,9 +754,14 @@ void LLVertexBuffer::drawRange(U32 mode, U32 start, U32 end, U32 count, U32 indi U16* idx = ((U16*) getIndicesPointer())+indices_offset; stop_glerror(); + LLGLSLShader::startProfile(); glDrawRangeElements(sGLMode[mode], start, end, count, GL_UNSIGNED_SHORT, idx); + LLGLSLShader::stopProfile(count, mode); stop_glerror(); + + + placeFence(); } @@ -797,8 +805,10 @@ void LLVertexBuffer::draw(U32 mode, U32 count, U32 indices_offset) const } stop_glerror(); + LLGLSLShader::startProfile(); glDrawElements(sGLMode[mode], count, GL_UNSIGNED_SHORT, ((U16*) getIndicesPointer()) + indices_offset); + LLGLSLShader::stopProfile(count, mode); stop_glerror(); placeFence(); } @@ -838,7 +848,9 @@ void LLVertexBuffer::drawArrays(U32 mode, U32 first, U32 count) const } stop_glerror(); + LLGLSLShader::startProfile(); glDrawArrays(sGLMode[mode], first, count); + LLGLSLShader::stopProfile(count, mode); stop_glerror(); placeFence(); } |