fp16: disable SSE if we're on ARM.
This commit is contained in:
parent
8a2618d2ab
commit
906d91e84a
2 changed files with 6 additions and 0 deletions
2
fp16.c
2
fp16.c
|
@ -416,6 +416,7 @@ FP16 approx_float_to_half(FP32 f)
|
|||
return o;
|
||||
}
|
||||
|
||||
#ifndef NEON_OPTS
|
||||
// round-half-up (same as ISPC)
|
||||
__m128i float_to_half_SSE2(__m128 f)
|
||||
{
|
||||
|
@ -547,6 +548,7 @@ __m128i approx_float_to_half_SSE2(__m128 f)
|
|||
|
||||
#undef CONSTF
|
||||
}
|
||||
#endif
|
||||
|
||||
// from fox toolkit float->half code (which "approx" variants match)
|
||||
static uint basetable[512];
|
||||
|
|
4
fp16.h
4
fp16.h
|
@ -106,7 +106,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifndef NEON_OPTS
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
typedef unsigned int uint;
|
||||
|
||||
|
@ -144,9 +146,11 @@ FP16 float_to_half_fast2(FP32 f);
|
|||
FP16 float_to_half_fast3(FP32 f);
|
||||
FP16 float_to_half_fast3_rtne(FP32 f);
|
||||
FP16 approx_float_to_half(FP32 f);
|
||||
#ifndef NEON_OPTS
|
||||
__m128i float_to_half_SSE2(__m128 f);
|
||||
__m128i float_to_half_rtne_SSE2(__m128 f);
|
||||
__m128i approx_float_to_half_SSE2(__m128 f);
|
||||
#endif
|
||||
void fp16_generatetables();
|
||||
uint float_to_half_foxtk(uint f);
|
||||
FP32 half_to_float(FP16 h);
|
||||
|
|
Loading…
Reference in a new issue