diff --git a/modules/compiler/builtins/abacus/generate/abacus_detail_common/abacus_detail_common.in b/modules/compiler/builtins/abacus/generate/abacus_detail_common/abacus_detail_common.in index e7ad27863..29647b932 100644 --- a/modules/compiler/builtins/abacus/generate/abacus_detail_common/abacus_detail_common.in +++ b/modules/compiler/builtins/abacus/generate/abacus_detail_common/abacus_detail_common.in @@ -115,8 +115,35 @@ template T degrees(const T& t) { return t * T(57.295779513082320876798154814105170332405472466564321); } -template T mix(const T& x, const T& y, const U& a) { - return x + ((y - x) * T(a)); +template +T mix(const T &a, const T &b, const T &t) { + // Unoptimized vector version of the implementation of std::lerp given in + // https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2019/p0811r3.html + // adjusted to support non-finite arguments. + // + // Unlike std::lerp, OpenCL mix returns an undefined result if + // !(t >= 0 && t <= 1), so we could simplify slightly, but we aim to behave + // sensibly for this. + + using SignedType = typename TypeTraits::SignedType; + + // The direct translation of the expression mix/lerp is defined as. This + // version is used in most cases, but has issues around t=1 and does not + // account for the possibility b-a overflows. + const T x = a + t * (b - a); + + // Correct for the issues around t=1 by returning b in edge cases. + const SignedType useb = + __abacus_select(b <= x, x <= b, (t > T(1)) == (b > a)) | + ((t == T(1)) & (x == x)); + + // Alternative definition that avoids overflow of b-a. This version is used + // when a and b are finite numbers with different signs. + const T y = t * b + (T(1) - t) * a; + const SignedType usey = isfinite(a) & isfinite(b) & + (((a <= 0) & (b >= 0)) | ((a >= 0) & (b <= 0))); + + return __abacus_select(__abacus_select(x, b, useb), y, usey); } template T radians(const T& t) { diff --git a/modules/compiler/builtins/abacus/source/abacus_common/mix.cpp b/modules/compiler/builtins/abacus/source/abacus_common/mix.cpp index e5609378c..6e0bf50a8 100644 --- a/modules/compiler/builtins/abacus/source/abacus_common/mix.cpp +++ b/modules/compiler/builtins/abacus/source/abacus_common/mix.cpp @@ -21,9 +21,9 @@ TYPE __abacus_mix(TYPE x, TYPE y, TYPE a) { \ return abacus::detail::common::mix(x, y, a); \ } -#define DEF2(TYPE, TYPE2) \ - TYPE __abacus_mix(TYPE x, TYPE y, TYPE2 a) { \ - return abacus::detail::common::mix(x, y, a); \ +#define DEF2(TYPE, TYPE2) \ + TYPE __abacus_mix(TYPE x, TYPE y, TYPE2 a) { \ + return abacus::detail::common::mix(x, y, a); \ } #ifdef __CA_BUILTINS_HALF_SUPPORT diff --git a/source/cl/test/UnitCL/source/ktst_precision.cpp b/source/cl/test/UnitCL/source/ktst_precision.cpp index 5d518d2c6..925ea7a65 100644 --- a/source/cl/test/UnitCL/source/ktst_precision.cpp +++ b/source/cl/test/UnitCL/source/ktst_precision.cpp @@ -1633,24 +1633,12 @@ TEST_P(HalfMathBuiltins, Precision_53_Half_mix) { GTEST_SKIP(); } - // TODO: CA-2882: Vector widths 2 and 4 don't work -#ifdef __arm__ - if ((getParam() == 2) || (getParam() == 4)) { - GTEST_SKIP(); - } -#endif - auto mix_ref = [](cl_float x, cl_float y, cl_float a) -> cl_float { + // Our reference for mix is inaccurate due to intermediate rounding, even if + // we use float rather than half, but because we test with MAX_ULP_ERROR, + // this does not matter for the overall test. cl_float sub = y - x; - // Check for overflow and underflow of intermediate - const cl_half sub_as_half = ConvertFloatToHalf(sub); - if (IsInf(sub_as_half)) { - sub = std::copysign(INFINITY, sub); - } else if (0 == (sub_as_half & ~TypeInfo::sign_bit)) { - sub = std::copysign(0.0f, sub); - } - return x + sub * a; }; @@ -1666,25 +1654,12 @@ TEST_P(HalfMathBuiltins, Precision_53_Half_mix_scalar) { GTEST_SKIP(); } - // TODO: CA-2731: Vector widths 2 and 8 don't work - // TODO: CA-2882: Vector width 4 doesn't work -#ifdef __arm__ - if ((getParam() == 2) || (getParam() == 4) || (getParam() == 8)) { - GTEST_SKIP(); - } -#endif - auto mix_ref = [](cl_float x, cl_float y, cl_float a) -> cl_float { + // Our reference for mix is inaccurate due to intermediate rounding, even if + // we use float rather than half, but because we test with MAX_ULP_ERROR, + // this does not matter for the overall test. cl_float sub = y - x; - // Check for overflow and underflow of intermediate - const cl_half sub_as_half = ConvertFloatToHalf(sub); - if (IsInf(sub_as_half)) { - sub = std::copysign(INFINITY, sub); - } else if (0 == (sub_as_half & ~TypeInfo::sign_bit)) { - sub = std::copysign(0.0f, sub); - } - return x + sub * a; };