1 files changed, 66 insertions, 67 deletions
diff --git a/indra/newview/llglsandbox.cpp b/indra/newview/llglsandbox.cpp
index b50e1d6d3c..1b22b903fc 100644
--- a/indra/newview/llglsandbox.cpp
+++ b/indra/newview/llglsandbox.cpp
@@ -821,7 +821,7 @@ void LLViewerObjectList::renderObjectBeacons()
 			const LLVector3 &thisline = debug_beacon.mPositionAgent;
 		
 			gGL.begin(LLRender::LINES);
-			gGL.color4fv(color.mV);
+			gGL.color4fv(linearColor4(color).mV);
 			draw_cross_lines(thisline, 2.0f, 2.0f, 50.f);
 			draw_line_cube(0.10f, thisline);
 			
@@ -850,7 +850,7 @@ void LLViewerObjectList::renderObjectBeacons()
 
 			const LLVector3 &thisline = debug_beacon.mPositionAgent;
 			gGL.begin(LLRender::LINES);
-			gGL.color4fv(debug_beacon.mColor.mV);
+			gGL.color4fv(linearColor4(debug_beacon.mColor).mV);
 			draw_cross_lines(thisline, 0.5f, 0.5f, 0.5f);
 			draw_line_cube(0.10f, thisline);
 
@@ -994,9 +994,8 @@ private:
 //-----------------------------------------------------------------------------
 F32 gpu_benchmark()
 {
-	if (!gGLManager.mHasTimerQuery)
+	if (gGLManager.mGLVersion < 3.3f)
 	{ // don't bother benchmarking venerable drivers which don't support accurate timing anyway
-      // and are likely to be correctly identified by the GPU table already.
 		return -1.f;
 	}
 
@@ -1036,8 +1035,6 @@ F32 gpu_benchmark()
 	//time limit, allocation operations shouldn't take longer then 30 seconds, same for actual benchmark.
 	const F32 time_limit = 30;
 
-	ShaderProfileHelper initProfile;
-	
 	std::vector<LLRenderTarget> dest(count);
 	TextureHolder texHolder(0, count);
 	std::vector<F32> results;
@@ -1058,7 +1055,7 @@ F32 gpu_benchmark()
 	for (U32 i = 0; i < count; ++i)
 	{
 		//allocate render targets and textures
-		if (!dest[i].allocate(res, res, GL_RGBA, false, false, LLTexUnit::TT_TEXTURE, true))
+		if (!dest[i].allocate(res, res, GL_RGBA))
 		{
 			LL_WARNS("Benchmark") << "Failed to allocate render target." << LL_ENDL;
 			// abandon the benchmark test
@@ -1091,9 +1088,9 @@ F32 gpu_benchmark()
     delete [] pixels;
 
 	//make a dummy triangle to draw with
-	LLPointer<LLVertexBuffer> buff = new LLVertexBuffer(LLVertexBuffer::MAP_VERTEX, GL_STREAM_DRAW);
+	LLPointer<LLVertexBuffer> buff = new LLVertexBuffer(LLVertexBuffer::MAP_VERTEX);
 
-	if (!buff->allocateBuffer(3, 0, true))
+	if (!buff->allocateBuffer(3, 0))
 	{
 		LL_WARNS("Benchmark") << "Failed to allocate buffer during benchmark." << LL_ENDL;
 		// abandon the benchmark test
@@ -1117,67 +1114,51 @@ F32 gpu_benchmark()
 	v[1].set(-1, -3, 0);
 	v[2].set(3, 1, 0);
 
-	buff->flush();
-
-	// ensure matched pair of bind() and unbind() calls
-	ShaderBinder binder(gBenchmarkProgram);
-
-//#ifdef GL_ARB_vertex_array_object
-    U32 glarray = 0;
-
-    if (LLRender::sGLCoreProfile)
-    {
-        glGenVertexArrays(1, &glarray);
-        glBindVertexArray(glarray);
+	buff->unmapBuffer();
+
+    LLGLSLShader::unbind();
+
+    F32 time_passed = 0; // seconds
+
+    { //run CPU timer benchmark
+        glFinish();
+        gBenchmarkProgram.bind();
+        for (S32 c = -1; c < samples && time_passed < time_limit; ++c)
+        {
+            LLTimer timer;
+            timer.start();
+
+            for (U32 i = 0; i < count; ++i)
+            {
+                dest[i].bindTarget();
+                texHolder.bind(i);
+                buff->setBuffer();
+                buff->drawArrays(LLRender::TRIANGLES, 0, 3);
+                dest[i].flush();
+            }
+
+            //wait for current batch of copies to finish
+            glFinish();
+
+            F32 time = timer.getElapsedTimeF32();
+            time_passed += time;
+
+            if (c >= 0) // <-- ignore the first sample as it tends to be artificially slow
+            {
+                //store result in gigabytes per second
+                F32 gb = (F32)((F64)(res * res * 8 * count)) / (1000000000);
+                F32 gbps = gb / time;
+                results.push_back(gbps);
+            }
+        }
+        gBenchmarkProgram.unbind();
     }
-//#endif
-
-	buff->setBuffer(LLVertexBuffer::MAP_VERTEX);
-	glFinish();
-
-	F32 time_passed = 0; // seconds
-	for (S32 c = -1; c < samples && time_passed < time_limit; ++c)
-	{
-		LLTimer timer;
-		timer.start();
-
-		for (U32 i = 0; i < count; ++i)
-		{
-			dest[i].bindTarget();
-			texHolder.bind(i);
-			buff->drawArrays(LLRender::TRIANGLES, 0, 3);
-			dest[i].flush();
-		}
-
-		//wait for current batch of copies to finish
-		glFinish();
-
-		F32 time = timer.getElapsedTimeF32();
-		time_passed += time;
-
-		if (c >= 0) // <-- ignore the first sample as it tends to be artificially slow
-		{ 
-			//store result in gigabytes per second
-			F32 gb = (F32) ((F64) (res*res*8*count))/(1000000000);
-			F32 gbps = gb/time;
-			results.push_back(gbps);
-		}
-	}
-
-//#ifdef GL_ARB_vertex_array_object
-    if (LLRender::sGLCoreProfile)
-    {
-        glBindVertexArray(0);
-        glDeleteVertexArrays(1, &glarray);
-    }
-//#endif
-
 
 	std::sort(results.begin(), results.end());
 
 	F32 gbps = results[results.size()/2];
 
-	LL_INFOS("Benchmark") << "Memory bandwidth is " << llformat("%.3f", gbps) << "GB/sec according to CPU timers, " << (F32)results.size() << " tests took " << time_passed << " seconds" << LL_ENDL;
+	LL_INFOS("Benchmark") << "Memory bandwidth is " << llformat("%.3f", gbps) << " GB/sec according to CPU timers, " << (F32)results.size() << " tests took " << time_passed << " seconds" << LL_ENDL;
   
 #if LL_DARWIN
     if (gbps > 512.f)
@@ -1188,14 +1169,32 @@ F32 gpu_benchmark()
     }
 #endif
 
+    // run GPU timer benchmark
+    { 
+        ShaderProfileHelper initProfile;
+        dest[0].bindTarget();
+        gBenchmarkProgram.bind();
+        for (S32 c = 0; c < samples; ++c)
+        {
+            for (U32 i = 0; i < count; ++i)
+            {
+                texHolder.bind(i);
+                buff->setBuffer();
+                buff->drawArrays(LLRender::TRIANGLES, 0, 3);
+            }
+        }
+        gBenchmarkProgram.unbind();
+        dest[0].flush();
+    }
+
 	F32 ms = gBenchmarkProgram.mTimeElapsed/1000000.f;
 	F32 seconds = ms/1000.f;
 
-	F64 samples_drawn = res*res*count*results.size();
+    F64 samples_drawn = gBenchmarkProgram.mSamplesDrawn;
 	F32 samples_sec = (samples_drawn/1000000000.0)/seconds;
-	gbps = samples_sec*8;
+	gbps = samples_sec*4;  // 4 bytes per sample
 
-	LL_INFOS("Benchmark") << "Memory bandwidth is " << llformat("%.3f", gbps) << "GB/sec according to ARB_timer_query, total time " << seconds << " seconds" << LL_ENDL;
+	LL_INFOS("Benchmark") << "Memory bandwidth is " << llformat("%.3f", gbps) << " GB/sec according to ARB_timer_query, total time " << seconds << " seconds" << LL_ENDL;
 
 	return gbps;
 }