diff options
| author | Erik Kundiman <erik@megapahit.org> | 2024-06-30 07:37:29 +0800 | 
|---|---|---|
| committer | Erik Kundiman <erik@megapahit.org> | 2024-06-30 07:37:29 +0800 | 
| commit | 3c5f4d1273e02c9d247ff86d6cef20c37e587f19 (patch) | |
| tree | e78cdf529cd6cc3b7657482048f7e577eb6f1e47 /indra | |
| parent | 095bf053f148dac958158f6709303bf09fbcd719 (diff) | |
| parent | 2e16d1b365e465c0c3e505770e213200ec25fc12 (diff) | |
Merge remote-tracking branch 'mobserveur/main'
Diffstat (limited to 'indra')
| -rw-r--r-- | indra/llrender/llgl.cpp | 7 | ||||
| -rw-r--r-- | indra/llrender/llgl.h | 1 | ||||
| -rw-r--r-- | indra/llrender/llvertexbuffer.cpp | 42 | ||||
| -rw-r--r-- | indra/newview/app_settings/settings.xml | 18 | ||||
| -rw-r--r-- | indra/newview/llagentcamera.cpp | 4 | ||||
| -rw-r--r-- | indra/newview/llappviewer.cpp | 37 | 
6 files changed, 64 insertions, 45 deletions
| diff --git a/indra/llrender/llgl.cpp b/indra/llrender/llgl.cpp index 10920f1de3..9207e6ad73 100644 --- a/indra/llrender/llgl.cpp +++ b/indra/llrender/llgl.cpp @@ -1001,6 +1001,7 @@ LLGLManager::LLGLManager() :      mIsAMD(FALSE),      mIsNVIDIA(FALSE),      mIsIntel(FALSE), +    mIsApple(FALSE),  #if LL_DARWIN      mIsMobileGF(FALSE),  #endif @@ -1174,6 +1175,11 @@ bool LLGLManager::initGL()          mGLVendorShort = "INTEL";          mIsIntel = TRUE;      } +    else if(mGLVendor.find("APPLE") != std::string::npos) +    { +        mGLVendorShort = "APPLE"; +        mIsApple = TRUE; +    }      else      {          mGLVendorShort = "MISC"; @@ -1373,6 +1379,7 @@ void LLGLManager::asLLSD(LLSD& info)      info["is_ati"] = mIsAMD;  // note, do not rename is_ati to is_amd without coordinating with DW      info["is_nvidia"] = mIsNVIDIA;      info["is_intel"] = mIsIntel; +    info["is_apple"] = mIsApple;      info["gl_renderer"] = mGLRenderer;  } diff --git a/indra/llrender/llgl.h b/indra/llrender/llgl.h index 5a7ad943df..e4b106c999 100644 --- a/indra/llrender/llgl.h +++ b/indra/llrender/llgl.h @@ -100,6 +100,7 @@ public:      BOOL mIsAMD;      BOOL mIsNVIDIA;      BOOL mIsIntel; +    BOOL mIsApple;  #if LL_DARWIN      // Needed to distinguish problem cards on older Macs that break with Materials diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp index 783794512c..52fb58187a 100644 --- a/indra/llrender/llvertexbuffer.cpp +++ b/indra/llrender/llvertexbuffer.cpp @@ -1155,19 +1155,40 @@ static void flush_vbo(GLenum target, U32 start, U32 end, void* data)          LL_PROFILE_ZONE_NUM(end-start);          U32 size = end-start+1; +        U32 block_size = 65536; -        //Note (observeur): glBufferSubData() was causing synchronization stalls, specialy on Apple GPUs, possibly to the fact Apple GPU is a tiled gpu, resulting to heavy stutters, and spacialy when called several times per frame on the same buffer. +        //Note (observeur): The following code is executed on non Apple gpus. Using glMapBufferRange() didn't show obvious benefit on the other tested platforms (intel igpu, amd igpu and nVidia dgpus). +        if(!gGLManager.mIsApple) +        { +            for (U32 i = start; i <= end; i += block_size) +            { +                LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block"); +                LL_PROFILE_GPU_ZONE("glBufferSubData"); +                U32 tend = llmin(i + block_size, end); +                U32 size = tend - i + 1; +                glBufferSubData(target, i, size, (U8*) data + (i-start)); +            } + +            return; +        } + +        //Note (observeur): glBufferSubData() was causing synchronization stalls on Apple GPUs resulting to heavy stutters and lower performance in the world and UI rendering. Using glMapBufferRange() benefits Macs with Apple gpus enormously. -        //Note (observeur): I maintained the notion of block_size for testing purpose, but i think it's a bad idea. We don't know the overhead of glMapBufferRange() depending on the driver, so it's better avoiding calling it more than necessary.(0 -> loop is disabled, 8192 -> original value, 524288 -> a resonable value). -        constexpr U32 block_size = 0; +        //Note (observeur): Other bits such as GL_MAP_INVALIDATE_RANGE_BIT or GL_MAP_UNSYNCHRONIZED_BIT didn't seem to make much of a difference on Apple gpus, so we stick to the simple way. +        U32 MapBits = GL_MAP_WRITE_BIT; +        //Note (observeur): Using a block size of 0 will call the following block and map the buffer all in once. It doesn't bother Apple machines, it might actually benefit them a little bit. A larger value is also fine. The largest buffers I observed where around 2mb or 3mb while most of buffers are smaller than 50000 bytes. +        block_size = 524288; + +        //Note (observeur): This is called in case block_size is set to 0 (All in one mapping).          if(block_size == 0)          {              U8 * mptr = NULL;              LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block");              LL_PROFILE_GPU_ZONE("glBufferSubData"); -            mptr = (U8*) glMapBufferRange( target, start, size, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT); +            mptr = (U8*) glMapBufferRange( target, start, size, MapBits); +              if(mptr)              {                  std::memcpy(mptr, (U8*) data, size); @@ -1180,11 +1201,13 @@ static void flush_vbo(GLenum target, U32 start, U32 end, void* data)              return;          } +        //Note (observeur): The following code is executed in case of block_size is superior to 0 +          //Note (observeur): This is for analysis purpose only -        if(size > block_size) -        { -            LL_INFOS() << "Large data range : " << size << LL_ENDL; -        } +        //if(size > block_size) +        //{ +        //    LL_INFOS() << "Large data range (MB MODE) : " << size << LL_ENDL; +        //}          U8 * mptr = NULL; @@ -1195,7 +1218,8 @@ static void flush_vbo(GLenum target, U32 start, U32 end, void* data)              U32 tend = llmin(i + block_size, end);              size = tend - i + 1; -            mptr = (U8*) glMapBufferRange( target, i, size, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT); +            mptr = (U8*) glMapBufferRange( target, i, size, MapBits ); +              if(mptr)              {                  std::memcpy(mptr, (U8*) data + (i-start), size); diff --git a/indra/newview/app_settings/settings.xml b/indra/newview/app_settings/settings.xml index a07396b5b1..00c7eae9bc 100644 --- a/indra/newview/app_settings/settings.xml +++ b/indra/newview/app_settings/settings.xml @@ -2468,7 +2468,7 @@        <key>Value</key>        <integer>0</integer>      </map> -    <key>DoubleClickTeleport</key>  +    <key>DoubleClickTeleport</key>      <map>        <key>Comment</key>        <string>Enable double-click to teleport where allowed (afects minimap and people panel)</string> @@ -8898,7 +8898,7 @@        <key>Value</key>        <integer>1</integer>      </map> -   +    <key>RenderReflectionDetail</key>      <map>        <key>Comment</key> @@ -8965,7 +8965,7 @@      <key>Value</key>      <real>1</real>    </map> -   +    <key>RenderReflectionProbeDrawDistance</key>    <map>      <key>Comment</key> @@ -9153,7 +9153,7 @@      <key>Value</key>      <real>0.7</real>    </map> -   +    <key>RenderReflectionProbeMaxLocalLightAmbiance</key>    <map>      <key>Comment</key> @@ -10589,7 +10589,7 @@              <string>Boolean</string>          <key>Value</key>              <integer>0</integer> -    </map>  +    </map>      <key>NearbyListShowMap</key>      <map>        <key>Comment</key> @@ -13477,13 +13477,13 @@      <key>MaxFPS</key>      <map>        <key>Comment</key> -      <string>OBSOLETE UNUSED setting.</string> +      <string>FPS Limiter.</string>        <key>Persist</key>        <integer>1</integer>        <key>Type</key> -      <string>F32</string> +      <string>U32</string>        <key>Value</key> -      <real>-1.0</real> +      <real>0</real>      </map>      <key>ZoomDirect</key>      <map> @@ -15384,7 +15384,7 @@      <key>Type</key>      <string>Boolean</string>      <key>Value</key> -    <integer>1</integer>         +    <integer>1</integer>    </map>    <key>UpdateAppWindowTitleBar</key>    <map> diff --git a/indra/newview/llagentcamera.cpp b/indra/newview/llagentcamera.cpp index 1912d9d1d5..8e8d7e6c50 100644 --- a/indra/newview/llagentcamera.cpp +++ b/indra/newview/llagentcamera.cpp @@ -2663,9 +2663,9 @@ void LLAgentCamera::setCameraPosAndFocusGlobal(const LLVector3d& camera_pos, con      if (mCameraAnimating)      { -        const F64 ANIM_METERS_PER_SECOND = 10.0; +        const F64 ANIM_METERS_PER_SECOND = 15.0;          const F64 MIN_ANIM_SECONDS = 0.5; -        const F64 MAX_ANIM_SECONDS = 10.0; +        const F64 MAX_ANIM_SECONDS = 3.0;          F64 anim_duration = llmax( MIN_ANIM_SECONDS, sqrt(focus_delta_squared) / ANIM_METERS_PER_SECOND );          anim_duration = llmin( anim_duration, MAX_ANIM_SECONDS );          setAnimationDuration( (F32)anim_duration ); diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp index 2d65c725b5..0f1b5a789b 100644 --- a/indra/newview/llappviewer.cpp +++ b/indra/newview/llappviewer.cpp @@ -393,7 +393,6 @@ static std::string gLaunchFileOnQuit;  // Used on Win32 for other apps to identify our window (eg, win_setup)  const char* const VIEWER_WINDOW_CLASSNAME = "Second Life"; -U64 fpsLimitSleepUntil = 0; // fps limiter : time until to render the frame again  //---------------------------------------------------------------------------- @@ -1373,21 +1372,11 @@ bool LLAppViewer::frame()  bool LLAppViewer::doFrame()  { +    static LLCachedControl<U32> fpsLimitMaxFps(gSavedSettings, "MaxFPS", 0); -    // FPS Limit - -    U64 fpsLimitNow = LLTrace::BlockTimer::getCPUClockCount64(); -    U64 fpsLimitFrameStartTime = fpsLimitNow; -    if(fpsLimitSleepUntil > 0) -    { -        if(fpsLimitSleepUntil > fpsLimitNow) return 0; -    } -    else -    { -        fpsLimitSleepUntil = 0; -    } - - +    U64 fpsLimitSleepFor = 0; +    U64 fpsLimitFrameStartTime = 0; +    if(fpsLimitMaxFps > 0) fpsLimitFrameStartTime = LLTrace::BlockTimer::getCPUClockCount64();      LL_RECORD_BLOCK_TIME(FTM_FRAME);      { @@ -1559,24 +1548,17 @@ bool LLAppViewer::doFrame()              }          } -        // fps limiter - -        fpsLimitNow = LLTrace::BlockTimer::getCPUClockCount64(); -        U64 fpsLimitFrameTime = fpsLimitNow - fpsLimitFrameStartTime; -        static LLCachedControl<U32> fpsLimitMaxFps(gSavedSettings, "MaxFPS", 0); -          if(fpsLimitMaxFps > 0)          { +            U64 fpsLimitFrameTime = LLTrace::BlockTimer::getCPUClockCount64() - fpsLimitFrameStartTime;              U64 desired_time_ns = (U32)(1000000.f / fpsLimitMaxFps); -            if(fpsLimitFrameTime < desired_time_ns) +            if((fpsLimitFrameTime+1000) < desired_time_ns)              { -                U64 fpsLimitSleepUntil_for = desired_time_ns - fpsLimitFrameTime; -                fpsLimitSleepUntil = LLTrace::BlockTimer::getCPUClockCount64() + fpsLimitSleepUntil_for; +                fpsLimitSleepFor = (desired_time_ns - fpsLimitFrameTime - 1000) * 1.0;              }          } -          {              LL_PROFILE_ZONE_NAMED_CATEGORY_APP( "df pauseMainloopTimeout" )          pingMainloopTimeout("Main:Sleep"); @@ -1589,6 +1571,11 @@ bool LLAppViewer::doFrame()              //LL_RECORD_BLOCK_TIME(SLEEP2);              LL_PROFILE_ZONE_WARN( "Sleep2" ) +            if(fpsLimitSleepFor) +            { +                usleep(fpsLimitSleepFor); +            } +              // yield some time to the os based on command line option              static LLCachedControl<S32> yield_time(gSavedSettings, "YieldTime", -1);              if(yield_time >= 0) | 
