diff options
| author | RunitaiLinden <davep@lindenlab.com> | 2023-05-02 18:47:21 -0500 | 
|---|---|---|
| committer | RunitaiLinden <davep@lindenlab.com> | 2023-05-02 18:47:21 -0500 | 
| commit | e09475713b7abe6fcb916f4a770081a1696b57ab (patch) | |
| tree | 23ff7f10a9f710e237600df5bbc40b180ed71010 | |
| parent | 2994833e7cc53670bd3303cb88054d7acee875cf (diff) | |
DRTVWR-559 Optimization pass, make it so profileAvatar can read back GPU timer without a frame stall.
| -rw-r--r-- | indra/llcommon/llerror.cpp | 1 | ||||
| -rw-r--r-- | indra/llrender/llglslshader.cpp | 72 | ||||
| -rw-r--r-- | indra/llrender/llglslshader.h | 16 | ||||
| -rw-r--r-- | indra/llwindow/llwindowwin32.cpp | 12 | ||||
| -rw-r--r-- | indra/newview/lldrawable.cpp | 6 | ||||
| -rw-r--r-- | indra/newview/lldrawpoolalpha.cpp | 8 | ||||
| -rw-r--r-- | indra/newview/llmeshrepository.cpp | 1 | ||||
| -rw-r--r-- | indra/newview/llviewerobject.cpp | 1 | ||||
| -rw-r--r-- | indra/newview/llvoavatar.cpp | 64 | ||||
| -rw-r--r-- | indra/newview/llvoavatar.h | 26 | ||||
| -rw-r--r-- | indra/newview/llvoavatarself.cpp | 1 | ||||
| -rw-r--r-- | indra/newview/llvovolume.cpp | 34 | ||||
| -rw-r--r-- | indra/newview/pipeline.cpp | 71 | ||||
| -rw-r--r-- | indra/newview/pipeline.h | 9 | 
14 files changed, 179 insertions, 143 deletions
| diff --git a/indra/llcommon/llerror.cpp b/indra/llcommon/llerror.cpp index 56fb7c21ca..a7794cc045 100644 --- a/indra/llcommon/llerror.cpp +++ b/indra/llcommon/llerror.cpp @@ -1611,6 +1611,7 @@ namespace LLError  bool debugLoggingEnabled(const std::string& tag)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_APP;      LLMutexTrylock lock(getMutex<LOG_MUTEX>(), 5);      if (!lock.isLocked())      { diff --git a/indra/llrender/llglslshader.cpp b/indra/llrender/llglslshader.cpp index 04ac2476a7..b7f08aa9af 100644 --- a/indra/llrender/llglslshader.cpp +++ b/indra/llrender/llglslshader.cpp @@ -243,9 +243,9 @@ void LLGLSLShader::stopProfile()      }  } -void LLGLSLShader::placeProfileQuery() +void LLGLSLShader::placeProfileQuery(bool for_runtime)  { -    if (sProfileEnabled) +    if (sProfileEnabled || for_runtime)      {          if (mTimerQuery == 0)          { @@ -254,42 +254,70 @@ void LLGLSLShader::placeProfileQuery()              glGenQueries(1, &mPrimitivesQuery);          } -        glBeginQuery(GL_SAMPLES_PASSED, mSamplesQuery);          glBeginQuery(GL_TIME_ELAPSED, mTimerQuery); -        glBeginQuery(GL_PRIMITIVES_GENERATED, mPrimitivesQuery); + +        if (!for_runtime) +        { +            glBeginQuery(GL_SAMPLES_PASSED, mSamplesQuery); +            glBeginQuery(GL_PRIMITIVES_GENERATED, mPrimitivesQuery); +        }      }  } -void LLGLSLShader::readProfileQuery() +bool LLGLSLShader::readProfileQuery(bool for_runtime, bool force_read)  { -    if (sProfileEnabled) +    if (sProfileEnabled || for_runtime)      { -        glEndQuery(GL_TIME_ELAPSED); -        glEndQuery(GL_SAMPLES_PASSED); -        glEndQuery(GL_PRIMITIVES_GENERATED); +        if (!mProfilePending) +        { +            glEndQuery(GL_TIME_ELAPSED); +            if (!for_runtime) +            { +                glEndQuery(GL_SAMPLES_PASSED); +                glEndQuery(GL_PRIMITIVES_GENERATED); +            } +            mProfilePending = for_runtime; +        } + +        if (mProfilePending && for_runtime && !force_read) +        { +            GLuint64 result = 0; +            glGetQueryObjectui64v(mTimerQuery, GL_QUERY_RESULT_AVAILABLE, &result); + +            if (result != GL_TRUE) +            { +                return false; +            } +        }          GLuint64 time_elapsed = 0;          glGetQueryObjectui64v(mTimerQuery, GL_QUERY_RESULT, &time_elapsed); +        mTimeElapsed += time_elapsed; +        mProfilePending = false; -        GLuint64 samples_passed = 0; -        glGetQueryObjectui64v(mSamplesQuery, GL_QUERY_RESULT, &samples_passed); +        if (!for_runtime) +        { +            GLuint64 samples_passed = 0; +            glGetQueryObjectui64v(mSamplesQuery, GL_QUERY_RESULT, &samples_passed); -        U64 primitives_generated = 0; -        glGetQueryObjectui64v(mPrimitivesQuery, GL_QUERY_RESULT, &primitives_generated); -        sTotalTimeElapsed += time_elapsed; -        mTimeElapsed += time_elapsed; +            U64 primitives_generated = 0; +            glGetQueryObjectui64v(mPrimitivesQuery, GL_QUERY_RESULT, &primitives_generated); +            sTotalTimeElapsed += time_elapsed; -        sTotalSamplesDrawn += samples_passed; -        mSamplesDrawn += samples_passed; +            sTotalSamplesDrawn += samples_passed; +            mSamplesDrawn += samples_passed; -        U32 tri_count = (U32)primitives_generated / 3; +            U32 tri_count = (U32)primitives_generated / 3; -        mTrianglesDrawn += tri_count; -        sTotalTrianglesDrawn += tri_count; +            mTrianglesDrawn += tri_count; +            sTotalTrianglesDrawn += tri_count; -        sTotalBinds++; -        mBinds++; +            sTotalBinds++; +            mBinds++; +        }      } + +    return true;  } diff --git a/indra/llrender/llglslshader.h b/indra/llrender/llglslshader.h index 9d187c972c..3e7dae6669 100644 --- a/indra/llrender/llglslshader.h +++ b/indra/llrender/llglslshader.h @@ -168,8 +168,16 @@ public:      void unload();      void clearStats();      void dumpStats(); -    void placeProfileQuery(); -    void readProfileQuery(); + +    // place query objects for profiling if profiling is enabled +    // if for_runtime is true, will place timer query only whether or not profiling is enabled +    void placeProfileQuery(bool for_runtime = false); + +    // Readback query objects if profiling is enabled +    // If for_runtime is true, will readback timer query iff query is available +    // Will return false if a query is pending (try again later) +    // If force_read is true, will force an immediate readback (severe performance penalty) +    bool readProfileQuery(bool for_runtime = false, bool force_read = false);      BOOL createShader(std::vector<LLStaticHashedString>* attributes,          std::vector<LLStaticHashedString>* uniforms, @@ -292,6 +300,7 @@ public:      defines_map_t mDefines;      //statistics for profiling shader performance +    bool mProfilePending = false;      U32 mTimerQuery;      U32 mSamplesQuery;      U32 mPrimitivesQuery; @@ -308,6 +317,9 @@ public:      // this pointer should be set to whichever shader represents this shader's rigged variant      LLGLSLShader* mRiggedVariant = nullptr; +    // hacky flag used for optimization in LLDrawPoolAlpha +    bool mCanBindFast = false; +  #ifdef LL_PROFILER_ENABLE_RENDER_DOC      void setLabel(const char* label);  #endif diff --git a/indra/llwindow/llwindowwin32.cpp b/indra/llwindow/llwindowwin32.cpp index 651c04f32c..de2188efb2 100644 --- a/indra/llwindow/llwindowwin32.cpp +++ b/indra/llwindow/llwindowwin32.cpp @@ -4898,8 +4898,6 @@ void LLWindowWin32::LLWindowWin32Thread::updateVRAMUsage()          { // current usage is sometimes unreliable on Intel GPUs, fall back to estimated usage              cu_mb = llmax((U32)1, eu_mb);          } -        F32 eu_error = (F32)((S32)eu_mb - (S32)cu_mb) / (F32)cu_mb; -          U32 target_mb = budget_mb;          if (target_mb > 4096)  // if 4GB are installed, try to leave 2GB free  @@ -4913,6 +4911,9 @@ void LLWindowWin32::LLWindowWin32Thread::updateVRAMUsage()          mAvailableVRAM = cu_mb < target_mb ? target_mb - cu_mb : 0; +#if 0 +         +        F32 eu_error = (F32)((S32)eu_mb - (S32)cu_mb) / (F32)cu_mb;          LL_INFOS("Window") << "\nLocal\nAFR: " << info.AvailableForReservation / 1024 / 1024              << "\nBudget: " << info.Budget / 1024 / 1024              << "\nCR: " << info.CurrentReservation / 1024 / 1024 @@ -4920,12 +4921,7 @@ void LLWindowWin32::LLWindowWin32Thread::updateVRAMUsage()              << "\nEU: " << eu_mb << llformat(" (%.2f)", eu_error)              << "\nTU: " << target_mb              << "\nAM: " << mAvailableVRAM << LL_ENDL; - -        /*mDXGIAdapter->QueryVideoMemoryInfo(0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &info); -        LL_INFOS("Window") << "\nNon-Local\nAFR: " << info.AvailableForReservation / 1024 / 1024 -            << "\nBudget: " << info.Budget / 1024 / 1024 -            << "\nCR: " << info.CurrentReservation / 1024 / 1024 -            << "\nCU: " << info.CurrentUsage / 1024 / 1024 << LL_ENDL;*/ +#endif      }      else if (mD3DDevice != NULL)      { // fallback to D3D9 diff --git a/indra/newview/lldrawable.cpp b/indra/newview/lldrawable.cpp index 04b6ebd14c..ea59a413fa 100644 --- a/indra/newview/lldrawable.cpp +++ b/indra/newview/lldrawable.cpp @@ -908,12 +908,6 @@ void LLDrawable::updateDistance(LLCamera& camera, bool force_update)                  LLVector3 cam_pos_from_agent = LLViewerCamera::getInstance()->getOrigin();                  LLVector3 cam_to_box_offset = point_to_box_offset(cam_pos_from_agent, av_box);                  mDistanceWRTCamera = llmax(0.01f, ll_round(cam_to_box_offset.magVec(), 0.01f)); -                LL_DEBUGS("DynamicBox") << volume->getAvatar()->getFullname()  -                                        << " pos (ignored) " << pos -                                        << " cam pos " << cam_pos_from_agent -                                        << " box " << av_box[0] << "," << av_box[1]  -                                        << " -> dist " << mDistanceWRTCamera -                                        << LL_ENDL;                  mVObjp->updateLOD();                  return;              } diff --git a/indra/newview/lldrawpoolalpha.cpp b/indra/newview/lldrawpoolalpha.cpp index 404039189b..ec57e20d35 100644 --- a/indra/newview/lldrawpoolalpha.cpp +++ b/indra/newview/lldrawpoolalpha.cpp @@ -107,12 +107,10 @@ static void prepare_alpha_shader(LLGLSLShader* shader, bool textureGamma, bool d      // i.e. shaders\class1\deferred\alphaF.glsl      if (deferredEnvironment)      { -        gPipeline.bindDeferredShader( *shader ); -    } -    else -    { -        shader->bind(); +        shader->mCanBindFast = false;      } +     +    shader->bind();      shader->uniform1f(LLShaderMgr::DISPLAY_GAMMA, (gamma > 0.1f) ? 1.0f / gamma : (1.0f / 2.2f));      if (LLPipeline::sRenderingHUDs) diff --git a/indra/newview/llmeshrepository.cpp b/indra/newview/llmeshrepository.cpp index e8411dd573..d301e14e10 100644 --- a/indra/newview/llmeshrepository.cpp +++ b/indra/newview/llmeshrepository.cpp @@ -4548,6 +4548,7 @@ LLMeshCostData::LLMeshCostData()  bool LLMeshCostData::init(const LLSD& header)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_VOLUME;      mSizeByLOD.resize(4);      mEstTrisByLOD.resize(4); diff --git a/indra/newview/llviewerobject.cpp b/indra/newview/llviewerobject.cpp index 2f4274d0d0..8334ca329a 100644 --- a/indra/newview/llviewerobject.cpp +++ b/indra/newview/llviewerobject.cpp @@ -3965,6 +3965,7 @@ U32 LLViewerObject::recursiveGetTriangleCount(S32* vcount) const  // prim's scale. Should revisit at some point.  F32 LLViewerObject::recursiveGetScaledSurfaceArea() const  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_VOLUME;      F32 area = 0.f;      const LLDrawable* drawable = mDrawable;      if (drawable) diff --git a/indra/newview/llvoavatar.cpp b/indra/newview/llvoavatar.cpp index e38a6457f4..6a0ca74e7c 100644 --- a/indra/newview/llvoavatar.cpp +++ b/indra/newview/llvoavatar.cpp @@ -2697,6 +2697,10 @@ void LLVOAvatar::idleUpdate(LLAgent &agent, const F64 &time)      if ((LLFrameTimer::getFrameCount() + mID.mData[0]) % compl_upd_freq == 0)      { +        // DEPRECATED  +        // replace with LLPipeline::profileAvatar? +        // Avatar profile takes ~ 0.5ms while idleUpdateRenderComplexity takes ~5ms +        // (both are unacceptably costly)          idleUpdateRenderComplexity();      }      idleUpdateDebugInfo(); @@ -10841,6 +10845,7 @@ void LLVOAvatar::accountRenderComplexityForObject(      hud_complexity_list_t& hud_complexity_list,      object_complexity_list_t& object_complexity_list)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR;      if (attached_object && !attached_object->isHUDAttachment())  		{          mAttachmentVisibleTriangleCount += attached_object->recursiveGetTriangleCount(); @@ -10984,23 +10989,24 @@ void LLVOAvatar::accountRenderComplexityForObject(  // Calculations for mVisualComplexity value  void LLVOAvatar::calculateUpdateRenderComplexity()  { -    LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR;      /*****************************************************************       * This calculation should not be modified by third party viewers,       * since it is used to limit rendering and should be uniform for       * everyone. If you have suggested improvements, submit them to       * the official viewer for consideration.       *****************************************************************/ -	static const U32 COMPLEXITY_BODY_PART_COST = 200; -	static LLCachedControl<F32> max_complexity_setting(gSavedSettings,"MaxAttachmentComplexity"); -	F32 max_attachment_complexity = max_complexity_setting; -	max_attachment_complexity = llmax(max_attachment_complexity, DEFAULT_MAX_ATTACHMENT_COMPLEXITY); - -	// Diagnostic list of all textures on our avatar -	static std::set<LLUUID> all_textures; -      if (mVisualComplexityStale)  	{ +        LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR; + +        static const U32 COMPLEXITY_BODY_PART_COST = 200; +        static LLCachedControl<F32> max_complexity_setting(gSavedSettings, "MaxAttachmentComplexity"); +        F32 max_attachment_complexity = max_complexity_setting; +        max_attachment_complexity = llmax(max_attachment_complexity, DEFAULT_MAX_ATTACHMENT_COMPLEXITY); + +        // Diagnostic list of all textures on our avatar +        static std::set<LLUUID> all_textures; +  		U32 cost = VISUAL_COMPLEXITY_UNKNOWN;  		LLVOVolume::texture_cost_t textures;  		hud_complexity_list_t hud_complexity_list; @@ -11467,3 +11473,43 @@ BOOL LLVOAvatar::isTextureVisible(LLAvatarAppearanceDefines::ETextureIndex type,  	// non-self avatars don't have wearables  	return FALSE;  } + +void LLVOAvatar::placeProfileQuery() +{ +    if (mGPUTimerQuery == 0) +    { +        glGenQueries(1, &mGPUTimerQuery); +    } + +    glBeginQuery(GL_TIME_ELAPSED, mGPUTimerQuery); +} + +void LLVOAvatar::readProfileQuery(S32 retries) +{ +    if (!mGPUProfilePending) +    { +        glEndQuery(GL_TIME_ELAPSED); +        mGPUProfilePending = true; +    } + +    GLuint64 result = 0; +    glGetQueryObjectui64v(mGPUTimerQuery, GL_QUERY_RESULT_AVAILABLE, &result); + +    if (result == GL_TRUE || --retries <= 0) +    { // query available, readback result +        GLuint64 time_elapsed = 0; +        glGetQueryObjectui64v(mGPUTimerQuery, GL_QUERY_RESULT, &time_elapsed); +        mGPURenderTime = time_elapsed / 1000000.f; +        mGPUProfilePending = false; +    } +    else +    { // wait until next frame +        LLUUID id = getID(); + +        LL::WorkQueue::getInstance("mainloop")->post([id, retries] { +            LLVOAvatar* avatar = (LLVOAvatar*) gObjectList.findObject(id); +            avatar->readProfileQuery(retries); +            }); +    } +} + diff --git a/indra/newview/llvoavatar.h b/indra/newview/llvoavatar.h index 0bb19c4420..2ca44b041a 100644 --- a/indra/newview/llvoavatar.h +++ b/indra/newview/llvoavatar.h @@ -305,6 +305,9 @@ public:  	static const U32 VISUAL_COMPLEXITY_UNKNOWN;  	void			updateVisualComplexity(); +    void placeProfileQuery(); +    void readProfileQuery(S32 retries); +      // get the GPU time in ms of rendering this avatar including all attachments      // returns -1 if this avatar has not been profiled using gPipeline.profileAvatar      F32             getGPURenderTime() { return mGPURenderTime; } @@ -313,18 +316,11 @@ public:      // return -1 if this avatar has not been profiled using gPipeline.mProfileAvatar      F32             getCPURenderTime() { return mCPURenderTime; } -    // get the number of samples passed during the avatar profile -    // return -1 if this avatar has not been profiled using gPipeline.mProfileAvatar -    S32             getGPUSamplesPassed() { return mGPUSamplesPassed; } - -    // get the number of triangles rendered during the avatar profile -    // return -1 if this avatar has not been profiled using gPipeline.mProfileAvatar -    S32             getGPUTrianglesRendered() { return mGPUTrianglesRendered; } - -    // DEPRECATED -- obsolete avatar render cost +     +    // avatar render cost  	U32				getVisualComplexity()			{ return mVisualComplexity;				}; -    // DEPRECATED -- obsolete surface area calculation +    // surface area calculation  	F32				getAttachmentSurfaceArea()		{ return mAttachmentSurfaceArea;		};  	U32				getReportedVisualComplexity()					{ return mReportedVisualComplexity;				};	// Numbers as reported by the SL server @@ -555,20 +551,18 @@ private:  	S32	 		mUpdatePeriod;  	S32  		mNumInitFaces; //number of faces generated when creating the avatar drawable, does not inculde splitted faces due to long vertex buffer. +    // profile handle +    U32 mGPUTimerQuery = 0; +      // profile results      // GPU render time in ms      F32 mGPURenderTime = -1.f; +    bool mGPUProfilePending = false;      // CPU render time in ms      F32 mCPURenderTime = -1.f; -    // number of samples passed according to GPU -    S32 mGPUSamplesPassed = -1; - -    // number of triangles rendered according to GPU -    S32 mGPUTrianglesRendered = -1; -  	// the isTooComplex method uses these mutable values to avoid recalculating too frequently      // DEPRECATED -- obsolete avatar render cost values  	mutable U32  mVisualComplexity; diff --git a/indra/newview/llvoavatarself.cpp b/indra/newview/llvoavatarself.cpp index d8b82d3114..82dfb1ca2a 100644 --- a/indra/newview/llvoavatarself.cpp +++ b/indra/newview/llvoavatarself.cpp @@ -1155,6 +1155,7 @@ LLViewerObject* LLVOAvatarSelf::getWornAttachment(const LLUUID& inv_item_id)  bool LLVOAvatarSelf::getAttachedPointName(const LLUUID& inv_item_id, std::string& name) const  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR;  	if (!gInventory.getItem(inv_item_id))  	{  		name = "ATTACHMENT_MISSING_ITEM"; diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp index da5a505d3b..585c98bace 100644 --- a/indra/newview/llvovolume.cpp +++ b/indra/newview/llvovolume.cpp @@ -1603,7 +1603,9 @@ BOOL LLVOVolume::updateLOD()  	{  		return FALSE;  	} -	 + +    LL_PROFILE_ZONE_SCOPED_CATEGORY_VOLUME; +  	BOOL lod_changed = FALSE;  	if (!LLSculptIDSize::instance().isUnloaded(getVolume()->getParams().getSculptID()))  @@ -1617,16 +1619,6 @@ BOOL LLVOVolume::updateLOD()  	if (lod_changed)  	{ -        if (debugLoggingEnabled("AnimatedObjectsLinkset")) -        { -            if (isAnimatedObject() && isRiggedMesh()) -            { -                std::string vobj_name = llformat("Vol%p", this); -                F32 est_tris = getEstTrianglesMax(); -                LL_DEBUGS("AnimatedObjectsLinkset") << vobj_name << " updateLOD to " << getLOD() << ", tris " << est_tris << LL_ENDL;  -            } -        } -  		gPipeline.markRebuild(mDrawable, LLDrawable::REBUILD_VOLUME, FALSE);  		mLODChanged = TRUE;  	} @@ -3938,6 +3930,7 @@ const LLMatrix4 LLVOVolume::getRenderMatrix() const  // children, and cost should only be increased for unique textures  -Nyx  U32 LLVOVolume::getRenderCost(texture_cost_t &textures) const  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_VOLUME;      /*****************************************************************       * This calculation should not be modified by third party viewers,       * since it is used to limit rendering and should be uniform for @@ -4532,16 +4525,6 @@ const LLMatrix4& LLVOVolume::getWorldMatrix(LLXformMatrix* xform) const  void LLVOVolume::markForUpdate(BOOL priority)  {  -    if (debugLoggingEnabled("AnimatedObjectsLinkset")) -    { -        if (isAnimatedObject() && isRiggedMesh()) -        { -            std::string vobj_name = llformat("Vol%p", this); -            F32 est_tris = getEstTrianglesMax(); -            LL_DEBUGS("AnimatedObjectsLinkset") << vobj_name << " markForUpdate, tris " << est_tris << LL_ENDL;  -        } -    } -      if (mDrawable)      {          shrinkWrap(); @@ -6098,15 +6081,6 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group)  					if (!vobj) continue; -					if (debugLoggingEnabled("AnimatedObjectsLinkset")) -					{ -						if (vobj->isAnimatedObject() && vobj->isRiggedMesh()) -						{ -							std::string vobj_name = llformat("Vol%p", vobj); -							F32 est_tris = vobj->getEstTrianglesMax(); -							LL_DEBUGS("AnimatedObjectsLinkset") << vobj_name << " rebuildMesh, tris " << est_tris << LL_ENDL; -						} -					}  					if (vobj->isNoLOD()) continue;  					vobj->preRebuild(); diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp index df8b8a552a..4d9a8a594a 100644 --- a/indra/newview/pipeline.cpp +++ b/indra/newview/pipeline.cpp @@ -3016,18 +3016,6 @@ void LLPipeline::markRebuild(LLDrawable *drawablep, LLDrawable::EDrawableFlags f  {  	if (drawablep && !drawablep->isDead() && assertInitialized())  	{ -        if (debugLoggingEnabled("AnimatedObjectsLinkset")) -        { -            LLVOVolume *vol_obj = drawablep->getVOVolume(); -            if (vol_obj && vol_obj->isAnimatedObject() && vol_obj->isRiggedMesh()) -            { -                std::string vobj_name = llformat("Vol%p", vol_obj); -                F32 est_tris = vol_obj->getEstTrianglesMax(); -                LL_DEBUGS("AnimatedObjectsLinkset") << vobj_name << " markRebuild, tris " << est_tris  -                                                    << " priority " << (S32) priority << " flag " << std::hex << flag << LL_ENDL;  -            } -        } -      		if (!drawablep->isState(LLDrawable::BUILT))  		{  			priority = true; @@ -7697,10 +7685,18 @@ void LLPipeline::bindShadowMaps(LLGLSLShader& shader)  void LLPipeline::bindDeferredShaderFast(LLGLSLShader& shader)  { -    shader.bind(); -    bindLightFunc(shader); -    bindShadowMaps(shader); -    bindReflectionProbes(shader); +    if (shader.mCanBindFast) +    { // was previously fully bound, use fast path +        shader.bind(); +        bindLightFunc(shader); +        bindShadowMaps(shader); +        bindReflectionProbes(shader); +    } +    else +    { //wasn't previously bound, use slow path +        bindDeferredShader(shader); +        shader.mCanBindFast = true; +    }  }  void LLPipeline::bindDeferredShader(LLGLSLShader& shader, LLRenderTarget* light_target) @@ -10068,30 +10064,24 @@ void LLPipeline::renderRiggedGroups(LLRenderPass* pass, U32 type, bool texture)      }  } -static LLTrace::BlockTimerStatHandle FTM_GENERATE_IMPOSTOR("Generate Impostor"); -  void LLPipeline::profileAvatar(LLVOAvatar* avatar, bool profile_attachments)  {      if (gGLManager.mGLVersion < 3.25f)      { // profiling requires GL 3.3 or later          return;      } + +    LL_PROFILE_ZONE_SCOPED_CATEGORY_PIPELINE; +      LLGLSLShader* cur_shader = LLGLSLShader::sCurBoundShaderPtr;      mRT->deferredScreen.bindTarget();      mRT->deferredScreen.clear(); -    bool profile_enabled = LLGLSLShader::sProfileEnabled; -    LLGLSLShader::sProfileEnabled = true; -      if (!profile_attachments)      { -        // profile entire avatar all at once -         -        // use gDebugProgram as a proxy for getting profile results -        gDebugProgram.clearStats(); -        gDebugProgram.placeProfileQuery(); -        LLGLSLShader::sProfileEnabled = false; +        // profile entire avatar all at once and readback asynchronously +        avatar->placeProfileQuery();          LLTimer cpu_timer; @@ -10099,13 +10089,7 @@ void LLPipeline::profileAvatar(LLVOAvatar* avatar, bool profile_attachments)          avatar->mCPURenderTime = (F32)cpu_timer.getElapsedTimeF32() * 1000.f; -        LLGLSLShader::sProfileEnabled = true; -        gDebugProgram.readProfileQuery(); - -        avatar->mGPURenderTime = gDebugProgram.mTimeElapsed / 1000000.f; - -        avatar->mGPUSamplesPassed = gDebugProgram.mSamplesDrawn; -        avatar->mGPUTrianglesRendered = gDebugProgram.mTrianglesDrawn; +        avatar->readProfileQuery(5); // allow up to 5 frames of latency      }      else       {  @@ -10126,23 +10110,19 @@ void LLPipeline::profileAvatar(LLVOAvatar* avatar, bool profile_attachments)                  LLViewerObject* attached_object = attachment_iter->get();                  if (attached_object)                  { +                    // use gDebugProgram to do the GPU queries                      gDebugProgram.clearStats(); -                    gDebugProgram.placeProfileQuery(); -                    LLGLSLShader::sProfileEnabled = false; +                    gDebugProgram.placeProfileQuery(true);                      generateImpostor(avatar, false, true, attached_object); -                    LLGLSLShader::sProfileEnabled = true; -                    gDebugProgram.readProfileQuery(); +                    gDebugProgram.readProfileQuery(true, true);                      attached_object->mGPURenderTime = gDebugProgram.mTimeElapsed / 1000000.f; - -                    // TODO: maybe also record triangles and samples                  }              }          }      } -    LLGLSLShader::sProfileEnabled = profile_enabled;      mRT->deferredScreen.flush();      if (cur_shader) @@ -10153,7 +10133,7 @@ void LLPipeline::profileAvatar(LLVOAvatar* avatar, bool profile_attachments)  void LLPipeline::generateImpostor(LLVOAvatar* avatar, bool preview_avatar, bool for_profile, LLViewerObject* specific_attachment)  { -    LL_RECORD_BLOCK_TIME(FTM_GENERATE_IMPOSTOR); +    LL_PROFILE_ZONE_SCOPED_CATEGORY_PIPELINE;      LL_PROFILE_GPU_ZONE("generateImpostor");  	LLGLState::checkStates(); @@ -10201,6 +10181,7 @@ void LLPipeline::generateImpostor(LLVOAvatar* avatar, bool preview_avatar, bool              RENDER_TYPE_TREE,              RENDER_TYPE_VOIDWATER,              RENDER_TYPE_WATER, +            RENDER_TYPE_ALPHA_POST_WATER,              RENDER_TYPE_PASS_GRASS,              RENDER_TYPE_HUD,              RENDER_TYPE_PARTICLES, @@ -10392,9 +10373,9 @@ void LLPipeline::generateImpostor(LLVOAvatar* avatar, bool preview_avatar, bool  		LLDrawPoolAvatar::sMinimumAlpha = 0.f;  	} -    if (preview_avatar) +    if (preview_avatar || for_profile)      { -        // previews don't care about imposters +        // previews and profiles don't care about imposters          renderGeomDeferred(camera);          renderGeomPostDeferred(camera);      } @@ -10502,7 +10483,7 @@ void LLPipeline::generateImpostor(LLVOAvatar* avatar, bool preview_avatar, bool  	gGL.matrixMode(LLRender::MM_MODELVIEW);  	gGL.popMatrix(); -    if (!preview_avatar) +    if (!preview_avatar && !for_profile)      {          avatar->mNeedsImpostorUpdate = FALSE;          avatar->cacheImpostorValues(); diff --git a/indra/newview/pipeline.h b/indra/newview/pipeline.h index 45842a3948..8a0a9a7172 100644 --- a/indra/newview/pipeline.h +++ b/indra/newview/pipeline.h @@ -132,8 +132,17 @@ public:      bool allocateShadowBuffer(U32 resX, U32 resY);  	void resetVertexBuffers(LLDrawable* drawable); + +    // perform a profile of the given avatar +    // if profile_attachments is true, run a profile for each attachment      void profileAvatar(LLVOAvatar* avatar, bool profile_attachments = false); + +    // generate an impostor for the given avatar +    //  preview_avatar - if true, a preview window render is being performed +    //  for_profile - if true, a profile is being performed, do not update actual impostor +    //  specific_attachment - specific attachment to profile, or nullptr to profile entire avatar  	void generateImpostor(LLVOAvatar* avatar, bool preview_avatar = false, bool for_profile = false, LLViewerObject* specific_attachment = nullptr); +  	void bindScreenToTexture();  	void renderFinalize();  	void copyScreenSpaceReflections(LLRenderTarget* src, LLRenderTarget* dst); | 
