fp16: disable SSE if we're on ARM.
This commit is contained in:
parent
8a2618d2ab
commit
906d91e84a
2 changed files with 6 additions and 0 deletions
2
fp16.c
2
fp16.c
|
@ -416,6 +416,7 @@ FP16 approx_float_to_half(FP32 f)
|
||||||
return o;
|
return o;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef NEON_OPTS
|
||||||
// round-half-up (same as ISPC)
|
// round-half-up (same as ISPC)
|
||||||
__m128i float_to_half_SSE2(__m128 f)
|
__m128i float_to_half_SSE2(__m128 f)
|
||||||
{
|
{
|
||||||
|
@ -547,6 +548,7 @@ __m128i approx_float_to_half_SSE2(__m128 f)
|
||||||
|
|
||||||
#undef CONSTF
|
#undef CONSTF
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// from fox toolkit float->half code (which "approx" variants match)
|
// from fox toolkit float->half code (which "approx" variants match)
|
||||||
static uint basetable[512];
|
static uint basetable[512];
|
||||||
|
|
4
fp16.h
4
fp16.h
|
@ -106,7 +106,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#ifndef NEON_OPTS
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef unsigned int uint;
|
typedef unsigned int uint;
|
||||||
|
|
||||||
|
@ -144,9 +146,11 @@ FP16 float_to_half_fast2(FP32 f);
|
||||||
FP16 float_to_half_fast3(FP32 f);
|
FP16 float_to_half_fast3(FP32 f);
|
||||||
FP16 float_to_half_fast3_rtne(FP32 f);
|
FP16 float_to_half_fast3_rtne(FP32 f);
|
||||||
FP16 approx_float_to_half(FP32 f);
|
FP16 approx_float_to_half(FP32 f);
|
||||||
|
#ifndef NEON_OPTS
|
||||||
__m128i float_to_half_SSE2(__m128 f);
|
__m128i float_to_half_SSE2(__m128 f);
|
||||||
__m128i float_to_half_rtne_SSE2(__m128 f);
|
__m128i float_to_half_rtne_SSE2(__m128 f);
|
||||||
__m128i approx_float_to_half_SSE2(__m128 f);
|
__m128i approx_float_to_half_SSE2(__m128 f);
|
||||||
|
#endif
|
||||||
void fp16_generatetables();
|
void fp16_generatetables();
|
||||||
uint float_to_half_foxtk(uint f);
|
uint float_to_half_foxtk(uint f);
|
||||||
FP32 half_to_float(FP16 h);
|
FP32 half_to_float(FP16 h);
|
||||||
|
|
Loading…
Reference in a new issue