indra/llmath/llmath.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569

/**
 * @file llmath.h
 * @brief Useful math constants and macros.
 *
 * $LicenseInfo:firstyear=2000&license=viewerlgpl$
 * Second Life Viewer Source Code
 * Copyright (C) 2010, Linden Research, Inc.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation;
 * version 2.1 of the License only.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 *
 * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
 * $/LicenseInfo$
 */

#ifndef LLMATH_H
#define LLMATH_H

#include <cmath>
#include <cstdlib>
#include <vector>
#include <limits>
#include "lldefs.h"
//#include "llstl.h" // *TODO: Remove when LLString is gone
//#include "llstring.h" // *TODO: Remove when LLString is gone
// lltut.h uses is_approx_equal_fraction(). This was moved to its own header
// file in llcommon so we can use lltut.h for llcommon tests without making
// llcommon depend on llmath.
#include "is_approx_equal_fraction.h"

// work around for Windows & older gcc non-standard function names.
#if LL_WINDOWS
#include <float.h>
#define llisnan(val)    _isnan(val)
#define llfinite(val)   _finite(val)
#elif (LL_LINUX && __GNUC__ <= 2)
#define llisnan(val)    isnan(val)
#define llfinite(val)   isfinite(val)
#else
#define llisnan(val)    std::isnan(val)
#define llfinite(val)   std::isfinite(val)
#endif

// Single Precision Floating Point Routines
// (There used to be more defined here, but they appeared to be redundant and
// were breaking some other includes. Removed by Falcon, reviewed by Andrew, 11/25/09)
/*#ifndef tanf
#define tanf(x)     ((F32)tan((F64)(x)))
#endif*/

constexpr F32   GRAVITY         = -9.8f;

// mathematical constants
constexpr F32   F_PI        = 3.1415926535897932384626433832795f;
constexpr F32   F_TWO_PI    = 6.283185307179586476925286766559f;
constexpr F32   F_PI_BY_TWO = 1.5707963267948966192313216916398f;
constexpr F32   F_SQRT_TWO_PI = 2.506628274631000502415765284811f;
constexpr F32   F_E         = 2.71828182845904523536f;
constexpr F32   F_SQRT2     = 1.4142135623730950488016887242097f;
constexpr F32   F_SQRT3     = 1.73205080756888288657986402541f;
constexpr F32   OO_SQRT2    = 0.7071067811865475244008443621049f;
constexpr F32   OO_SQRT3    = 0.577350269189625764509f;
constexpr F32   DEG_TO_RAD  = 0.017453292519943295769236907684886f;
constexpr F32   RAD_TO_DEG  = 57.295779513082320876798154814105f;
constexpr F32   F_APPROXIMATELY_ZERO = 0.00001f;
constexpr F32   F_LN10      = 2.3025850929940456840179914546844f;
constexpr F32   OO_LN10     = 0.43429448190325182765112891891661;
constexpr F32   F_LN2       = 0.69314718056f;
constexpr F32   OO_LN2      = 1.4426950408889634073599246810019f;

constexpr F32   F_ALMOST_ZERO   = 0.0001f;
constexpr F32   F_ALMOST_ONE    = 1.0f - F_ALMOST_ZERO;

constexpr F32   GIMBAL_THRESHOLD = 0.000436f; // sets the gimballock threshold 0.025 away from +/-90 degrees
// formula: GIMBAL_THRESHOLD = sin(DEG_TO_RAD * gimbal_threshold_angle);

// BUG: Eliminate in favor of F_APPROXIMATELY_ZERO above?
constexpr F32 FP_MAG_THRESHOLD = 0.0000001f;

// TODO: Replace with logic like is_approx_equal
inline bool is_approx_zero( F32 f ) { return (-F_APPROXIMATELY_ZERO < f) && (f < F_APPROXIMATELY_ZERO); }

// These functions work by interpreting sign+exp+mantissa as an unsigned
// integer.
// For example:
// x = <sign>1 <exponent>00000010 <mantissa>00000000000000000000000
// y = <sign>1 <exponent>00000001 <mantissa>11111111111111111111111
//
// interpreted as ints =
// x = 10000001000000000000000000000000
// y = 10000000111111111111111111111111
// which is clearly a different of 1 in the least significant bit
// Values with the same exponent can be trivially shown to work.
//
// WARNING: Denormals of opposite sign do not work
// x = <sign>1 <exponent>00000000 <mantissa>00000000000000000000001
// y = <sign>0 <exponent>00000000 <mantissa>00000000000000000000001
// Although these values differ by 2 in the LSB, the sign bit makes
// the int comparison fail.
//
// WARNING: NaNs can compare equal
// There is no special treatment of exceptional values like NaNs
//
// WARNING: Infinity is comparable with F32_MAX and negative
// infinity is comparable with F32_MIN

// handles negative and positive zeros
inline bool is_zero(F32 x)
{
    return (*(U32*)(&x) & 0x7fffffff) == 0;
}

inline bool is_approx_equal(F32 x, F32 y)
{
    constexpr S32 COMPARE_MANTISSA_UP_TO_BIT = 0x02;
    return (std::abs((S32) ((U32&)x - (U32&)y) ) < COMPARE_MANTISSA_UP_TO_BIT);
}

inline bool is_approx_equal(F64 x, F64 y)
{
    constexpr S64 COMPARE_MANTISSA_UP_TO_BIT = 0x02;
    return (std::abs((S32) ((U64&)x - (U64&)y) ) < COMPARE_MANTISSA_UP_TO_BIT);
}

inline S32 llabs(const S32 a)
{
    return S32(std::labs(a));
}

inline F32 llabs(const F32 a)
{
    return F32(std::fabs(a));
}

inline F64 llabs(const F64 a)
{
    return F64(std::fabs(a));
}

inline S32 lltrunc( F32 f )
{
#if LL_WINDOWS && !defined( __INTEL_COMPILER ) && (ADDRESS_SIZE == 32)
        // Avoids changing the floating point control word.
        // Add or subtract 0.5 - epsilon and then round
        const static U32 zpfp[] = { 0xBEFFFFFF, 0x3EFFFFFF };
        S32 result;
        __asm {
            fld     f
            mov     eax,    f
            shr     eax,    29
            and     eax,    4
            fadd    dword ptr [zpfp + eax]
            fistp   result
        }
        return result;
#else
        return (S32)f;
#endif
}

inline S32 lltrunc( F64 f )
{
    return (S32)f;
}

inline S32 llfloor( F32 f )
{
#if LL_WINDOWS && !defined( __INTEL_COMPILER ) && (ADDRESS_SIZE == 32)
        // Avoids changing the floating point control word.
        // Accurate (unlike Stereopsis version) for all values between S32_MIN and S32_MAX and slightly faster than Stereopsis version.
        // Add -(0.5 - epsilon) and then round
        const U32 zpfp = 0xBEFFFFFF;
        S32 result;
        __asm {
            fld     f
            fadd    dword ptr [zpfp]
            fistp   result
        }
        return result;
#else
        return (S32)floor(f);
#endif
}


inline S32 llceil( F32 f )
{
    // This could probably be optimized, but this works.
    return (S32)ceil(f);
}


#ifndef BOGUS_ROUND
// Use this round.  Does an arithmetic round (0.5 always rounds up)
inline S32 ll_round(const F32 val)
{
    return llfloor(val + 0.5f);
}

#else // BOGUS_ROUND
// Old ll_round implementation - does banker's round (toward nearest even in the case of a 0.5.
// Not using this because we don't have a consistent implementation on both platforms, use
// llfloor(val + 0.5f), which is consistent on all platforms.
inline S32 ll_round(const F32 val)
{
    #if LL_WINDOWS
        // Note: assumes that the floating point control word is set to rounding mode (the default)
        S32 ret_val;
        _asm fld    val
        _asm fistp  ret_val;
        return ret_val;
    #elif LL_LINUX
        // Note: assumes that the floating point control word is set
        // to rounding mode (the default)
        S32 ret_val;
        __asm__ __volatile__( "flds %1    \n\t"
                              "fistpl %0  \n\t"
                              : "=m" (ret_val)
                              : "m" (val) );
        return ret_val;
    #else
        return llfloor(val + 0.5f);
    #endif
}

// A fast arithmentic round on intel, from Laurent de Soras http://ldesoras.free.fr
inline int round_int(double x)
{
    const float round_to_nearest = 0.5f;
    int i;
    __asm
    {
        fld x
        fadd st, st (0)
        fadd round_to_nearest
        fistp i
        sar i, 1
    }
    return (i);
}
#endif // BOGUS_ROUND

inline F64 ll_round(const F64 val)
{
    return F64(floor(val + 0.5f));
}

inline F32 ll_round( F32 val, F32 nearest )
{
    return F32(floor(val * (1.0f / nearest) + 0.5f)) * nearest;
}

inline F64 ll_round( F64 val, F64 nearest )
{
    return F64(floor(val * (1.0 / nearest) + 0.5)) * nearest;
}

// these provide minimum peak error
//
// avg  error = -0.013049
// peak error = -31.4 dB
// RMS  error = -28.1 dB

constexpr F32 FAST_MAG_ALPHA = 0.960433870103f;
constexpr F32 FAST_MAG_BETA = 0.397824734759f;

// these provide minimum RMS error
//
// avg  error = 0.000003
// peak error = -32.6 dB
// RMS  error = -25.7 dB
//
//constexpr F32 FAST_MAG_ALPHA = 0.948059448969f;
//constexpr F32 FAST_MAG_BETA = 0.392699081699f;

inline F32 fastMagnitude(F32 a, F32 b)
{
    a = (a > 0) ? a : -a;
    b = (b > 0) ? b : -b;
    return(FAST_MAG_ALPHA * llmax(a,b) + FAST_MAG_BETA * llmin(a,b));
}


////////////////////
//
// Fast F32/S32 conversions
//
// Culled from www.stereopsis.com/FPU.html

constexpr F64 LL_DOUBLE_TO_FIX_MAGIC    = 68719476736.0*1.5;     //2^36 * 1.5,  (52-_shiftamt=36) uses limited precisicion to floor
constexpr S32 LL_SHIFT_AMOUNT           = 16;                    //16.16 fixed point representation,

// Endian dependent code
#ifdef LL_LITTLE_ENDIAN
    #define LL_EXP_INDEX                1
    #define LL_MAN_INDEX                0
#else
    #define LL_EXP_INDEX                0
    #define LL_MAN_INDEX                1
#endif

////////////////////////////////////////////////
//
// Fast exp and log
//

// Implementation of fast exp() approximation (from a paper by Nicol N. Schraudolph
// http://www.inf.ethz.ch/~schraudo/pubs/exp.pdf
static union
{
    double d;
    struct
    {
#ifdef LL_LITTLE_ENDIAN
        S32 j, i;
#else
        S32 i, j;
#endif
    } n;
} LLECO; // not sure what the name means

#define LL_EXP_A (1048576 * OO_LN2) // use 1512775 for integer
#define LL_EXP_C (60801)            // this value of C good for -4 < y < 4

#define LL_FAST_EXP(y) (LLECO.n.i = ll_round(F32(LL_EXP_A*(y))) + (1072693248 - LL_EXP_C), LLECO.d)

inline F32 llfastpow(const F32 x, const F32 y)
{
    return (F32)(LL_FAST_EXP(y * log(x)));
}


inline F32 snap_to_sig_figs(F32 foo, S32 sig_figs)
{
    // compute the power of ten
    F32 bar = 1.f;
    for (S32 i = 0; i < sig_figs; i++)
    {
        bar *= 10.f;
    }

    F32 sign = (foo > 0.f) ? 1.f : -1.f;
    F32 new_foo = F32( S64(foo * bar + sign * 0.5f));
    new_foo /= bar;

    return new_foo;
}

inline F32 lerp(F32 a, F32 b, F32 u)
{
    return a + ((b - a) * u);
}

inline F32 lerp2d(F32 x00, F32 x01, F32 x10, F32 x11, F32 u, F32 v)
{
    F32 a = x00 + (x01-x00)*u;
    F32 b = x10 + (x11-x10)*u;
    F32 r = a + (b-a)*v;
    return r;
}

inline F32 ramp(F32 x, F32 a, F32 b)
{
    return (a == b) ? 0.0f : ((a - x) / (a - b));
}

inline F32 rescale(F32 x, F32 x1, F32 x2, F32 y1, F32 y2)
{
    return lerp(y1, y2, ramp(x, x1, x2));
}

inline F32 clamp_rescale(F32 x, F32 x1, F32 x2, F32 y1, F32 y2)
{
    if (y1 < y2)
    {
        return llclamp(rescale(x,x1,x2,y1,y2),y1,y2);
    }
    else
    {
        return llclamp(rescale(x,x1,x2,y1,y2),y2,y1);
    }
}


inline F32 cubic_step( F32 x, F32 x0, F32 x1, F32 s0, F32 s1 )
{
    if (x <= x0)
        return s0;

    if (x >= x1)
        return s1;

    F32 f = (x - x0) / (x1 - x0);

    return  s0 + (s1 - s0) * (f * f) * (3.0f - 2.0f * f);
}

inline F32 cubic_step( F32 x )
{
    x = llclampf(x);

    return  (x * x) * (3.0f - 2.0f * x);
}

inline F32 quadratic_step( F32 x, F32 x0, F32 x1, F32 s0, F32 s1 )
{
    if (x <= x0)
        return s0;

    if (x >= x1)
        return s1;

    F32 f = (x - x0) / (x1 - x0);
    F32 f_squared = f * f;

    return  (s0 * (1.f - f_squared)) + ((s1 - s0) * f_squared);
}

inline F32 llsimple_angle(F32 angle)
{
    while(angle <= -F_PI)
        angle += F_TWO_PI;
    while(angle >  F_PI)
        angle -= F_TWO_PI;
    return angle;
}

//SDK - Renamed this to get_lower_power_two, since this is what this actually does.
inline U32 get_lower_power_two(U32 val, U32 max_power_two)
{
    if(!max_power_two)
    {
        max_power_two = 1 << 31 ;
    }
    if(max_power_two & (max_power_two - 1))
    {
        return 0 ;
    }

    for(; val < max_power_two ; max_power_two >>= 1) ;

    return max_power_two ;
}

// calculate next highest power of two, limited by max_power_two
// This is taken from a brilliant little code snipped on http://acius2.blogspot.com/2007/11/calculating-next-power-of-2.html
// Basically we convert the binary to a solid string of 1's with the same
// number of digits, then add one.  We subtract 1 initially to handle
// the case where the number passed in is actually a power of two.
// WARNING: this only works with 32 bit ints.
inline U32 get_next_power_two(U32 val, U32 max_power_two)
{
    if(!max_power_two)
    {
        max_power_two = 1 << 31 ;
    }

    if(val >= max_power_two)
    {
        return max_power_two;
    }

    val--;
    val = (val >> 1) | val;
    val = (val >> 2) | val;
    val = (val >> 4) | val;
    val = (val >> 8) | val;
    val = (val >> 16) | val;
    val++;

    return val;
}

//get the gaussian value given the linear distance from axis x and guassian value o
inline F32 llgaussian(F32 x, F32 o)
{
    return 1.f/(F_SQRT_TWO_PI*o)*powf(F_E, -(x*x)/(2*o*o));
}

//helper function for removing outliers
template <class VEC_TYPE>
inline void ll_remove_outliers(std::vector<VEC_TYPE>& data, F32 k)
{
    if (data.size() < 100)
    { //not enough samples
        return;
    }

    VEC_TYPE Q1 = data[data.size()/4];
    VEC_TYPE Q3 = data[data.size()-data.size()/4-1];

    if ((F32)(Q3-Q1) < 1.f)
    {
        // not enough variation to detect outliers
        return;
    }


    VEC_TYPE min = (VEC_TYPE) ((F32) Q1-k * (F32) (Q3-Q1));
    VEC_TYPE max = (VEC_TYPE) ((F32) Q3+k * (F32) (Q3-Q1));

    U32 i = 0;
    while (i < data.size() && data[i] < min)
    {
        i++;
    }

    size_t j = data.size()-1;
    while (j > 0 && data[j] > max)
    {
        j--;
    }

    if (j < data.size()-1)
    {
        data.erase(data.begin()+j, data.end());
    }

    if (i > 0)
    {
        data.erase(data.begin(), data.begin()+i);
    }
}

// Converts given value from a linear RGB floating point value (0..1) to a gamma corrected (sRGB) value.
// Some shaders require color values in linear space, while others require color values in gamma corrected (sRGB) space.
// Note: in our code, values labeled as sRGB are ALWAYS gamma corrected linear values, NOT linear values with monitor gamma applied
// Note: stored color values should always be gamma corrected linear (i.e. the values returned from an on-screen color swatch)
// Note: DO NOT cache the conversion.  This leads to error prone synchronization and is actually slower in the typical case due to cache misses
inline float linearTosRGB(const float val) {
    if (val < 0.0031308f) {
        return val * 12.92f;
    }
    else {
        return 1.055f * pow(val, 1.0f / 2.4f) - 0.055f;
    }
}

// Converts given value from a gamma corrected (sRGB) floating point value (0..1) to a linear color value.
// Some shaders require color values in linear space, while others require color values in gamma corrected (sRGB) space.
// Note: In our code, values labeled as sRGB are gamma corrected linear values, NOT linear values with monitor gamma applied
// Note: Stored color values should generally be gamma corrected sRGB.
//       If you're serializing the return value of this function, you're probably doing it wrong.
// Note: DO NOT cache the conversion.  This leads to error prone synchronization and is actually slower in the typical case due to cache misses.
inline float sRGBtoLinear(const float val) {
    if (val < 0.04045f) {
        return val / 12.92f;
    }
    else {
        return pow((val + 0.055f) / 1.055f, 2.4f);
    }
}

// Include simd math header
#include "llsimdmath.h"

#endif