X-Git-Url: http://vcs.maemo.org/git/?a=blobdiff_plain;f=target-ppc%2Fop_helper.c;h=3afd217bc207dda138e6d4d9e555cc1245e04a71;hb=c7eb95e1180c524a9ef97e3fdfc8ad5d40b0cebb;hp=367c36682dade1aba9de7848ad138b8c076f543b;hpb=e40393399a4ed120deee90b0fdd5afbd8aa2301e;p=qemu diff --git a/target-ppc/op_helper.c b/target-ppc/op_helper.c index 367c366..3afd217 100644 --- a/target-ppc/op_helper.c +++ b/target-ppc/op_helper.c @@ -15,8 +15,9 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA */ +#include #include "exec.h" #include "host-utils.h" #include "helper.h" @@ -27,6 +28,13 @@ //#define DEBUG_EXCEPTIONS //#define DEBUG_SOFTWARE_TLB +#ifdef DEBUG_SOFTWARE_TLB +# define LOG_SWTLB(...) qemu_log(__VA_ARGS__) +#else +# define LOG_SWTLB(...) do { } while (0) +#endif + + /*****************************************************************************/ /* Exceptions processing helpers */ @@ -73,18 +81,14 @@ void helper_store_cr (target_ulong val, uint32_t mask) /* SPR accesses */ void helper_load_dump_spr (uint32_t sprn) { - if (loglevel != 0) { - fprintf(logfile, "Read SPR %d %03x => " ADDRX "\n", + qemu_log("Read SPR %d %03x => " ADDRX "\n", sprn, sprn, env->spr[sprn]); - } } void helper_store_dump_spr (uint32_t sprn) { - if (loglevel != 0) { - fprintf(logfile, "Write SPR %d %03x <= " ADDRX "\n", + qemu_log("Write SPR %d %03x <= " ADDRX "\n", sprn, sprn, env->spr[sprn]); - } } target_ulong helper_load_tbl (void) @@ -181,10 +185,8 @@ void helper_store_hid0_601 (target_ulong val) env->hflags_nmsr &= ~(1 << MSR_LE); env->hflags_nmsr |= (1 << MSR_LE) & (((val >> 3) & 1) << MSR_LE); env->hflags |= env->hflags_nmsr; - if (loglevel != 0) { - fprintf(logfile, "%s: set endianness to %c => " ADDRX "\n", + qemu_log("%s: set endianness to %c => " ADDRX "\n", __func__, val & 0x8 ? 'l' : 'b', env->hflags); - } } env->spr[SPR_HID0] = (uint32_t)val; } @@ -1499,15 +1501,14 @@ uint64_t helper_fsqrt (uint64_t arg) /* fre - fre. */ uint64_t helper_fre (uint64_t arg) { - CPU_DoubleU fone, farg; - fone.ll = 0x3FF0000000000000ULL; /* 1.0 */ + CPU_DoubleU farg; farg.ll = arg; if (unlikely(float64_is_signaling_nan(farg.d))) { /* sNaN reciprocal */ farg.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXSNAN); } else { - farg.d = float64_div(fone.d, farg.d, &env->fp_status); + farg.d = float64_div(float64_one, farg.d, &env->fp_status); } return farg.d; } @@ -1515,16 +1516,15 @@ uint64_t helper_fre (uint64_t arg) /* fres - fres. */ uint64_t helper_fres (uint64_t arg) { - CPU_DoubleU fone, farg; + CPU_DoubleU farg; float32 f32; - fone.ll = 0x3FF0000000000000ULL; /* 1.0 */ farg.ll = arg; if (unlikely(float64_is_signaling_nan(farg.d))) { /* sNaN reciprocal */ farg.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXSNAN); } else { - farg.d = float64_div(fone.d, farg.d, &env->fp_status); + farg.d = float64_div(float64_one, farg.d, &env->fp_status); f32 = float64_to_float32(farg.d, &env->fp_status); farg.d = float32_to_float64(f32, &env->fp_status); } @@ -1534,9 +1534,8 @@ uint64_t helper_fres (uint64_t arg) /* frsqrte - frsqrte. */ uint64_t helper_frsqrte (uint64_t arg) { - CPU_DoubleU fone, farg; + CPU_DoubleU farg; float32 f32; - fone.ll = 0x3FF0000000000000ULL; /* 1.0 */ farg.ll = arg; if (unlikely(float64_is_signaling_nan(farg.d))) { @@ -1547,7 +1546,7 @@ uint64_t helper_frsqrte (uint64_t arg) farg.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXSQRT); } else { farg.d = float64_sqrt(farg.d, &env->fp_status); - farg.d = float64_div(fone.d, farg.d, &env->fp_status); + farg.d = float64_div(float64_one, farg.d, &env->fp_status); f32 = float64_to_float32(farg.d, &env->fp_status); farg.d = float32_to_float64(f32, &env->fp_status); } @@ -1859,15 +1858,11 @@ target_ulong helper_load_dcr (target_ulong dcrn) target_ulong val = 0; if (unlikely(env->dcr_env == NULL)) { - if (loglevel != 0) { - fprintf(logfile, "No DCR environment\n"); - } + qemu_log("No DCR environment\n"); helper_raise_exception_err(POWERPC_EXCP_PROGRAM, POWERPC_EXCP_INVAL | POWERPC_EXCP_INVAL_INVAL); } else if (unlikely(ppc_dcr_read(env->dcr_env, dcrn, &val) != 0)) { - if (loglevel != 0) { - fprintf(logfile, "DCR read error %d %03x\n", (int)dcrn, (int)dcrn); - } + qemu_log("DCR read error %d %03x\n", (int)dcrn, (int)dcrn); helper_raise_exception_err(POWERPC_EXCP_PROGRAM, POWERPC_EXCP_INVAL | POWERPC_EXCP_PRIV_REG); } @@ -1877,15 +1872,11 @@ target_ulong helper_load_dcr (target_ulong dcrn) void helper_store_dcr (target_ulong dcrn, target_ulong val) { if (unlikely(env->dcr_env == NULL)) { - if (loglevel != 0) { - fprintf(logfile, "No DCR environment\n"); - } + qemu_log("No DCR environment\n"); helper_raise_exception_err(POWERPC_EXCP_PROGRAM, POWERPC_EXCP_INVAL | POWERPC_EXCP_INVAL_INVAL); } else if (unlikely(ppc_dcr_write(env->dcr_env, dcrn, val) != 0)) { - if (loglevel != 0) { - fprintf(logfile, "DCR write error %d %03x\n", (int)dcrn, (int)dcrn); - } + qemu_log("DCR write error %d %03x\n", (int)dcrn, (int)dcrn); helper_raise_exception_err(POWERPC_EXCP_PROGRAM, POWERPC_EXCP_INVAL | POWERPC_EXCP_PRIV_REG); } @@ -1971,6 +1962,126 @@ target_ulong helper_dlmzb (target_ulong high, target_ulong low, uint32_t update_ for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--) #endif +/* If X is a NaN, store the corresponding QNaN into RESULT. Otherwise, + * execute the following block. */ +#define DO_HANDLE_NAN(result, x) \ + if (float32_is_nan(x) || float32_is_signaling_nan(x)) { \ + CPU_FloatU __f; \ + __f.f = x; \ + __f.l = __f.l | (1 << 22); /* Set QNaN bit. */ \ + result = __f.f; \ + } else + +#define HANDLE_NAN1(result, x) \ + DO_HANDLE_NAN(result, x) +#define HANDLE_NAN2(result, x, y) \ + DO_HANDLE_NAN(result, x) DO_HANDLE_NAN(result, y) +#define HANDLE_NAN3(result, x, y, z) \ + DO_HANDLE_NAN(result, x) DO_HANDLE_NAN(result, y) DO_HANDLE_NAN(result, z) + +/* Saturating arithmetic helpers. */ +#define SATCVT(from, to, from_type, to_type, min, max, use_min, use_max) \ + static always_inline to_type cvt##from##to (from_type x, int *sat) \ + { \ + to_type r; \ + if (use_min && x < min) { \ + r = min; \ + *sat = 1; \ + } else if (use_max && x > max) { \ + r = max; \ + *sat = 1; \ + } else { \ + r = x; \ + } \ + return r; \ + } +SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX, 1, 1) +SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX, 1, 1) +SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX, 1, 1) +SATCVT(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX, 0, 1) +SATCVT(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX, 0, 1) +SATCVT(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX, 0, 1) +SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX, 1, 1) +SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX, 1, 1) +SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX, 1, 1) +#undef SATCVT + +#define LVE(name, access, swap, element) \ + void helper_##name (ppc_avr_t *r, target_ulong addr) \ + { \ + size_t n_elems = ARRAY_SIZE(r->element); \ + int adjust = HI_IDX*(n_elems-1); \ + int sh = sizeof(r->element[0]) >> 1; \ + int index = (addr & 0xf) >> sh; \ + if(msr_le) { \ + r->element[LO_IDX ? index : (adjust - index)] = swap(access(addr)); \ + } else { \ + r->element[LO_IDX ? index : (adjust - index)] = access(addr); \ + } \ + } +#define I(x) (x) +LVE(lvebx, ldub, I, u8) +LVE(lvehx, lduw, bswap16, u16) +LVE(lvewx, ldl, bswap32, u32) +#undef I +#undef LVE + +void helper_lvsl (ppc_avr_t *r, target_ulong sh) +{ + int i, j = (sh & 0xf); + + VECTOR_FOR_INORDER_I (i, u8) { + r->u8[i] = j++; + } +} + +void helper_lvsr (ppc_avr_t *r, target_ulong sh) +{ + int i, j = 0x10 - (sh & 0xf); + + VECTOR_FOR_INORDER_I (i, u8) { + r->u8[i] = j++; + } +} + +#define STVE(name, access, swap, element) \ + void helper_##name (ppc_avr_t *r, target_ulong addr) \ + { \ + size_t n_elems = ARRAY_SIZE(r->element); \ + int adjust = HI_IDX*(n_elems-1); \ + int sh = sizeof(r->element[0]) >> 1; \ + int index = (addr & 0xf) >> sh; \ + if(msr_le) { \ + access(addr, swap(r->element[LO_IDX ? index : (adjust - index)])); \ + } else { \ + access(addr, r->element[LO_IDX ? index : (adjust - index)]); \ + } \ + } +#define I(x) (x) +STVE(stvebx, stb, I, u8) +STVE(stvehx, stw, bswap16, u16) +STVE(stvewx, stl, bswap32, u32) +#undef I +#undef LVE + +void helper_mtvscr (ppc_avr_t *r) +{ +#if defined(WORDS_BIGENDIAN) + env->vscr = r->u32[3]; +#else + env->vscr = r->u32[0]; +#endif + set_flush_to_zero(vscr_nj, &env->vec_status); +} + +void helper_vaddcuw (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + int i; + for (i = 0; i < ARRAY_SIZE(r->u32); i++) { + r->u32[i] = ~a->u32[i] < b->u32[i]; + } +} + #define VARITH_DO(name, op, element) \ void helper_v##name (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ { \ @@ -1988,6 +2099,59 @@ VARITH(uwm, u32) #undef VARITH_DO #undef VARITH +#define VARITHFP(suffix, func) \ + void helper_v##suffix (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + { \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ + HANDLE_NAN2(r->f[i], a->f[i], b->f[i]) { \ + r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \ + } \ + } \ + } +VARITHFP(addfp, float32_add) +VARITHFP(subfp, float32_sub) +#undef VARITHFP + +#define VARITHSAT_CASE(type, op, cvt, element) \ + { \ + type result = (type)a->element[i] op (type)b->element[i]; \ + r->element[i] = cvt(result, &sat); \ + } + +#define VARITHSAT_DO(name, op, optype, cvt, element) \ + void helper_v##name (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + { \ + int sat = 0; \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ + switch (sizeof(r->element[0])) { \ + case 1: VARITHSAT_CASE(optype, op, cvt, element); break; \ + case 2: VARITHSAT_CASE(optype, op, cvt, element); break; \ + case 4: VARITHSAT_CASE(optype, op, cvt, element); break; \ + } \ + } \ + if (sat) { \ + env->vscr |= (1 << VSCR_SAT); \ + } \ + } +#define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ + VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ + VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) +#define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ + VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ + VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) +VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) +VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) +VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) +VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) +VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) +VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) +#undef VARITHSAT_CASE +#undef VARITHSAT_DO +#undef VARITHSAT_SIGNED +#undef VARITHSAT_UNSIGNED + #define VAVG_DO(name, element, etype) \ void helper_v##name (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ { \ @@ -2007,6 +2171,200 @@ VAVG(w, s32, int64_t, u32, uint64_t) #undef VAVG_DO #undef VAVG +#define VCF(suffix, cvt, element) \ + void helper_vcf##suffix (ppc_avr_t *r, ppc_avr_t *b, uint32_t uim) \ + { \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ + float32 t = cvt(b->element[i], &env->vec_status); \ + r->f[i] = float32_scalbn (t, -uim, &env->vec_status); \ + } \ + } +VCF(ux, uint32_to_float32, u32) +VCF(sx, int32_to_float32, s32) +#undef VCF + +#define VCMP_DO(suffix, compare, element, record) \ + void helper_vcmp##suffix (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + { \ + uint32_t ones = (uint32_t)-1; \ + uint32_t all = ones; \ + uint32_t none = 0; \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ + uint32_t result = (a->element[i] compare b->element[i] ? ones : 0x0); \ + switch (sizeof (a->element[0])) { \ + case 4: r->u32[i] = result; break; \ + case 2: r->u16[i] = result; break; \ + case 1: r->u8[i] = result; break; \ + } \ + all &= result; \ + none |= result; \ + } \ + if (record) { \ + env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ + } \ + } +#define VCMP(suffix, compare, element) \ + VCMP_DO(suffix, compare, element, 0) \ + VCMP_DO(suffix##_dot, compare, element, 1) +VCMP(equb, ==, u8) +VCMP(equh, ==, u16) +VCMP(equw, ==, u32) +VCMP(gtub, >, u8) +VCMP(gtuh, >, u16) +VCMP(gtuw, >, u32) +VCMP(gtsb, >, s8) +VCMP(gtsh, >, s16) +VCMP(gtsw, >, s32) +#undef VCMP_DO +#undef VCMP + +#define VCMPFP_DO(suffix, compare, order, record) \ + void helper_vcmp##suffix (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + { \ + uint32_t ones = (uint32_t)-1; \ + uint32_t all = ones; \ + uint32_t none = 0; \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ + uint32_t result; \ + int rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status); \ + if (rel == float_relation_unordered) { \ + result = 0; \ + } else if (rel compare order) { \ + result = ones; \ + } else { \ + result = 0; \ + } \ + r->u32[i] = result; \ + all &= result; \ + none |= result; \ + } \ + if (record) { \ + env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ + } \ + } +#define VCMPFP(suffix, compare, order) \ + VCMPFP_DO(suffix, compare, order, 0) \ + VCMPFP_DO(suffix##_dot, compare, order, 1) +VCMPFP(eqfp, ==, float_relation_equal) +VCMPFP(gefp, !=, float_relation_less) +VCMPFP(gtfp, ==, float_relation_greater) +#undef VCMPFP_DO +#undef VCMPFP + +static always_inline void vcmpbfp_internal (ppc_avr_t *r, ppc_avr_t *a, + ppc_avr_t *b, int record) +{ + int i; + int all_in = 0; + for (i = 0; i < ARRAY_SIZE(r->f); i++) { + int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status); + if (le_rel == float_relation_unordered) { + r->u32[i] = 0xc0000000; + /* ALL_IN does not need to be updated here. */ + } else { + float32 bneg = float32_chs(b->f[i]); + int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status); + int le = le_rel != float_relation_greater; + int ge = ge_rel != float_relation_less; + r->u32[i] = ((!le) << 31) | ((!ge) << 30); + all_in |= (!le | !ge); + } + } + if (record) { + env->crf[6] = (all_in == 0) << 1; + } +} + +void helper_vcmpbfp (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + vcmpbfp_internal(r, a, b, 0); +} + +void helper_vcmpbfp_dot (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + vcmpbfp_internal(r, a, b, 1); +} + +#define VCT(suffix, satcvt, element) \ + void helper_vct##suffix (ppc_avr_t *r, ppc_avr_t *b, uint32_t uim) \ + { \ + int i; \ + int sat = 0; \ + float_status s = env->vec_status; \ + set_float_rounding_mode(float_round_to_zero, &s); \ + for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ + if (float32_is_nan(b->f[i]) || \ + float32_is_signaling_nan(b->f[i])) { \ + r->element[i] = 0; \ + } else { \ + float64 t = float32_to_float64(b->f[i], &s); \ + int64_t j; \ + t = float64_scalbn(t, uim, &s); \ + j = float64_to_int64(t, &s); \ + r->element[i] = satcvt(j, &sat); \ + } \ + } \ + if (sat) { \ + env->vscr |= (1 << VSCR_SAT); \ + } \ + } +VCT(uxs, cvtsduw, u32) +VCT(sxs, cvtsdsw, s32) +#undef VCT + +void helper_vmaddfp (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ + int i; + for (i = 0; i < ARRAY_SIZE(r->f); i++) { + HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) { + /* Need to do the computation in higher precision and round + * once at the end. */ + float64 af, bf, cf, t; + af = float32_to_float64(a->f[i], &env->vec_status); + bf = float32_to_float64(b->f[i], &env->vec_status); + cf = float32_to_float64(c->f[i], &env->vec_status); + t = float64_mul(af, cf, &env->vec_status); + t = float64_add(t, bf, &env->vec_status); + r->f[i] = float64_to_float32(t, &env->vec_status); + } + } +} + +void helper_vmhaddshs (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ + int sat = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(r->s16); i++) { + int32_t prod = a->s16[i] * b->s16[i]; + int32_t t = (int32_t)c->s16[i] + (prod >> 15); + r->s16[i] = cvtswsh (t, &sat); + } + + if (sat) { + env->vscr |= (1 << VSCR_SAT); + } +} + +void helper_vmhraddshs (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ + int sat = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(r->s16); i++) { + int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; + int32_t t = (int32_t)c->s16[i] + (prod >> 15); + r->s16[i] = cvtswsh (t, &sat); + } + + if (sat) { + env->vscr |= (1 << VSCR_SAT); + } +} + #define VMINMAX_DO(name, compare, element) \ void helper_v##name (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ { \ @@ -2031,6 +2389,679 @@ VMINMAX(uw, u32) #undef VMINMAX_DO #undef VMINMAX +#define VMINMAXFP(suffix, rT, rF) \ + void helper_v##suffix (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + { \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ + HANDLE_NAN2(r->f[i], a->f[i], b->f[i]) { \ + if (float32_lt_quiet(a->f[i], b->f[i], &env->vec_status)) { \ + r->f[i] = rT->f[i]; \ + } else { \ + r->f[i] = rF->f[i]; \ + } \ + } \ + } \ + } +VMINMAXFP(minfp, a, b) +VMINMAXFP(maxfp, b, a) +#undef VMINMAXFP + +void helper_vmladduhm (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ + int i; + for (i = 0; i < ARRAY_SIZE(r->s16); i++) { + int32_t prod = a->s16[i] * b->s16[i]; + r->s16[i] = (int16_t) (prod + c->s16[i]); + } +} + +#define VMRG_DO(name, element, highp) \ + void helper_v##name (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + { \ + ppc_avr_t result; \ + int i; \ + size_t n_elems = ARRAY_SIZE(r->element); \ + for (i = 0; i < n_elems/2; i++) { \ + if (highp) { \ + result.element[i*2+HI_IDX] = a->element[i]; \ + result.element[i*2+LO_IDX] = b->element[i]; \ + } else { \ + result.element[n_elems - i*2 - (1+HI_IDX)] = b->element[n_elems - i - 1]; \ + result.element[n_elems - i*2 - (1+LO_IDX)] = a->element[n_elems - i - 1]; \ + } \ + } \ + *r = result; \ + } +#if defined(WORDS_BIGENDIAN) +#define MRGHI 0 +#define MRGLO 1 +#else +#define MRGHI 1 +#define MRGLO 0 +#endif +#define VMRG(suffix, element) \ + VMRG_DO(mrgl##suffix, element, MRGHI) \ + VMRG_DO(mrgh##suffix, element, MRGLO) +VMRG(b, u8) +VMRG(h, u16) +VMRG(w, u32) +#undef VMRG_DO +#undef VMRG +#undef MRGHI +#undef MRGLO + +void helper_vmsummbm (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ + int32_t prod[16]; + int i; + + for (i = 0; i < ARRAY_SIZE(r->s8); i++) { + prod[i] = (int32_t)a->s8[i] * b->u8[i]; + } + + VECTOR_FOR_INORDER_I(i, s32) { + r->s32[i] = c->s32[i] + prod[4*i] + prod[4*i+1] + prod[4*i+2] + prod[4*i+3]; + } +} + +void helper_vmsumshm (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ + int32_t prod[8]; + int i; + + for (i = 0; i < ARRAY_SIZE(r->s16); i++) { + prod[i] = a->s16[i] * b->s16[i]; + } + + VECTOR_FOR_INORDER_I(i, s32) { + r->s32[i] = c->s32[i] + prod[2*i] + prod[2*i+1]; + } +} + +void helper_vmsumshs (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ + int32_t prod[8]; + int i; + int sat = 0; + + for (i = 0; i < ARRAY_SIZE(r->s16); i++) { + prod[i] = (int32_t)a->s16[i] * b->s16[i]; + } + + VECTOR_FOR_INORDER_I (i, s32) { + int64_t t = (int64_t)c->s32[i] + prod[2*i] + prod[2*i+1]; + r->u32[i] = cvtsdsw(t, &sat); + } + + if (sat) { + env->vscr |= (1 << VSCR_SAT); + } +} + +void helper_vmsumubm (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ + uint16_t prod[16]; + int i; + + for (i = 0; i < ARRAY_SIZE(r->u8); i++) { + prod[i] = a->u8[i] * b->u8[i]; + } + + VECTOR_FOR_INORDER_I(i, u32) { + r->u32[i] = c->u32[i] + prod[4*i] + prod[4*i+1] + prod[4*i+2] + prod[4*i+3]; + } +} + +void helper_vmsumuhm (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ + uint32_t prod[8]; + int i; + + for (i = 0; i < ARRAY_SIZE(r->u16); i++) { + prod[i] = a->u16[i] * b->u16[i]; + } + + VECTOR_FOR_INORDER_I(i, u32) { + r->u32[i] = c->u32[i] + prod[2*i] + prod[2*i+1]; + } +} + +void helper_vmsumuhs (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ + uint32_t prod[8]; + int i; + int sat = 0; + + for (i = 0; i < ARRAY_SIZE(r->u16); i++) { + prod[i] = a->u16[i] * b->u16[i]; + } + + VECTOR_FOR_INORDER_I (i, s32) { + uint64_t t = (uint64_t)c->u32[i] + prod[2*i] + prod[2*i+1]; + r->u32[i] = cvtuduw(t, &sat); + } + + if (sat) { + env->vscr |= (1 << VSCR_SAT); + } +} + +#define VMUL_DO(name, mul_element, prod_element, evenp) \ + void helper_v##name (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + { \ + int i; \ + VECTOR_FOR_INORDER_I(i, prod_element) { \ + if (evenp) { \ + r->prod_element[i] = a->mul_element[i*2+HI_IDX] * b->mul_element[i*2+HI_IDX]; \ + } else { \ + r->prod_element[i] = a->mul_element[i*2+LO_IDX] * b->mul_element[i*2+LO_IDX]; \ + } \ + } \ + } +#define VMUL(suffix, mul_element, prod_element) \ + VMUL_DO(mule##suffix, mul_element, prod_element, 1) \ + VMUL_DO(mulo##suffix, mul_element, prod_element, 0) +VMUL(sb, s8, s16) +VMUL(sh, s16, s32) +VMUL(ub, u8, u16) +VMUL(uh, u16, u32) +#undef VMUL_DO +#undef VMUL + +void helper_vnmsubfp (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ + int i; + for (i = 0; i < ARRAY_SIZE(r->f); i++) { + HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) { + /* Need to do the computation is higher precision and round + * once at the end. */ + float64 af, bf, cf, t; + af = float32_to_float64(a->f[i], &env->vec_status); + bf = float32_to_float64(b->f[i], &env->vec_status); + cf = float32_to_float64(c->f[i], &env->vec_status); + t = float64_mul(af, cf, &env->vec_status); + t = float64_sub(t, bf, &env->vec_status); + t = float64_chs(t); + r->f[i] = float64_to_float32(t, &env->vec_status); + } + } +} + +void helper_vperm (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ + ppc_avr_t result; + int i; + VECTOR_FOR_INORDER_I (i, u8) { + int s = c->u8[i] & 0x1f; +#if defined(WORDS_BIGENDIAN) + int index = s & 0xf; +#else + int index = 15 - (s & 0xf); +#endif + if (s & 0x10) { + result.u8[i] = b->u8[index]; + } else { + result.u8[i] = a->u8[index]; + } + } + *r = result; +} + +#if defined(WORDS_BIGENDIAN) +#define PKBIG 1 +#else +#define PKBIG 0 +#endif +void helper_vpkpx (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + int i, j; + ppc_avr_t result; +#if defined(WORDS_BIGENDIAN) + const ppc_avr_t *x[2] = { a, b }; +#else + const ppc_avr_t *x[2] = { b, a }; +#endif + + VECTOR_FOR_INORDER_I (i, u64) { + VECTOR_FOR_INORDER_I (j, u32){ + uint32_t e = x[i]->u32[j]; + result.u16[4*i+j] = (((e >> 9) & 0xfc00) | + ((e >> 6) & 0x3e0) | + ((e >> 3) & 0x1f)); + } + } + *r = result; +} + +#define VPK(suffix, from, to, cvt, dosat) \ + void helper_vpk##suffix (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + { \ + int i; \ + int sat = 0; \ + ppc_avr_t result; \ + ppc_avr_t *a0 = PKBIG ? a : b; \ + ppc_avr_t *a1 = PKBIG ? b : a; \ + VECTOR_FOR_INORDER_I (i, from) { \ + result.to[i] = cvt(a0->from[i], &sat); \ + result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \ + } \ + *r = result; \ + if (dosat && sat) { \ + env->vscr |= (1 << VSCR_SAT); \ + } \ + } +#define I(x, y) (x) +VPK(shss, s16, s8, cvtshsb, 1) +VPK(shus, s16, u8, cvtshub, 1) +VPK(swss, s32, s16, cvtswsh, 1) +VPK(swus, s32, u16, cvtswuh, 1) +VPK(uhus, u16, u8, cvtuhub, 1) +VPK(uwus, u32, u16, cvtuwuh, 1) +VPK(uhum, u16, u8, I, 0) +VPK(uwum, u32, u16, I, 0) +#undef I +#undef VPK +#undef PKBIG + +void helper_vrefp (ppc_avr_t *r, ppc_avr_t *b) +{ + int i; + for (i = 0; i < ARRAY_SIZE(r->f); i++) { + HANDLE_NAN1(r->f[i], b->f[i]) { + r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status); + } + } +} + +#define VRFI(suffix, rounding) \ + void helper_vrfi##suffix (ppc_avr_t *r, ppc_avr_t *b) \ + { \ + int i; \ + float_status s = env->vec_status; \ + set_float_rounding_mode(rounding, &s); \ + for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ + HANDLE_NAN1(r->f[i], b->f[i]) { \ + r->f[i] = float32_round_to_int (b->f[i], &s); \ + } \ + } \ + } +VRFI(n, float_round_nearest_even) +VRFI(m, float_round_down) +VRFI(p, float_round_up) +VRFI(z, float_round_to_zero) +#undef VRFI + +#define VROTATE(suffix, element) \ + void helper_vrl##suffix (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + { \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ + unsigned int mask = ((1 << (3 + (sizeof (a->element[0]) >> 1))) - 1); \ + unsigned int shift = b->element[i] & mask; \ + r->element[i] = (a->element[i] << shift) | (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \ + } \ + } +VROTATE(b, u8) +VROTATE(h, u16) +VROTATE(w, u32) +#undef VROTATE + +void helper_vrsqrtefp (ppc_avr_t *r, ppc_avr_t *b) +{ + int i; + for (i = 0; i < ARRAY_SIZE(r->f); i++) { + HANDLE_NAN1(r->f[i], b->f[i]) { + float32 t = float32_sqrt(b->f[i], &env->vec_status); + r->f[i] = float32_div(float32_one, t, &env->vec_status); + } + } +} + +void helper_vsel (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ + r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]); + r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]); +} + +void helper_vlogefp (ppc_avr_t *r, ppc_avr_t *b) +{ + int i; + for (i = 0; i < ARRAY_SIZE(r->f); i++) { + HANDLE_NAN1(r->f[i], b->f[i]) { + r->f[i] = float32_log2(b->f[i], &env->vec_status); + } + } +} + +#if defined(WORDS_BIGENDIAN) +#define LEFT 0 +#define RIGHT 1 +#else +#define LEFT 1 +#define RIGHT 0 +#endif +/* The specification says that the results are undefined if all of the + * shift counts are not identical. We check to make sure that they are + * to conform to what real hardware appears to do. */ +#define VSHIFT(suffix, leftp) \ + void helper_vs##suffix (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + { \ + int shift = b->u8[LO_IDX*0x15] & 0x7; \ + int doit = 1; \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \ + doit = doit && ((b->u8[i] & 0x7) == shift); \ + } \ + if (doit) { \ + if (shift == 0) { \ + *r = *a; \ + } else if (leftp) { \ + uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \ + r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \ + r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \ + } else { \ + uint64_t carry = a->u64[HI_IDX] << (64 - shift); \ + r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \ + r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \ + } \ + } \ + } +VSHIFT(l, LEFT) +VSHIFT(r, RIGHT) +#undef VSHIFT +#undef LEFT +#undef RIGHT + +#define VSL(suffix, element) \ + void helper_vsl##suffix (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + { \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ + unsigned int mask = ((1 << (3 + (sizeof (a->element[0]) >> 1))) - 1); \ + unsigned int shift = b->element[i] & mask; \ + r->element[i] = a->element[i] << shift; \ + } \ + } +VSL(b, u8) +VSL(h, u16) +VSL(w, u32) +#undef VSL + +void helper_vsldoi (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) +{ + int sh = shift & 0xf; + int i; + ppc_avr_t result; + +#if defined(WORDS_BIGENDIAN) + for (i = 0; i < ARRAY_SIZE(r->u8); i++) { + int index = sh + i; + if (index > 0xf) { + result.u8[i] = b->u8[index-0x10]; + } else { + result.u8[i] = a->u8[index]; + } + } +#else + for (i = 0; i < ARRAY_SIZE(r->u8); i++) { + int index = (16 - sh) + i; + if (index > 0xf) { + result.u8[i] = a->u8[index-0x10]; + } else { + result.u8[i] = b->u8[index]; + } + } +#endif + *r = result; +} + +void helper_vslo (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf; + +#if defined (WORDS_BIGENDIAN) + memmove (&r->u8[0], &a->u8[sh], 16-sh); + memset (&r->u8[16-sh], 0, sh); +#else + memmove (&r->u8[sh], &a->u8[0], 16-sh); + memset (&r->u8[0], 0, sh); +#endif +} + +/* Experimental testing shows that hardware masks the immediate. */ +#define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1)) +#if defined(WORDS_BIGENDIAN) +#define SPLAT_ELEMENT(element) _SPLAT_MASKED(element) +#else +#define SPLAT_ELEMENT(element) (ARRAY_SIZE(r->element)-1 - _SPLAT_MASKED(element)) +#endif +#define VSPLT(suffix, element) \ + void helper_vsplt##suffix (ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \ + { \ + uint32_t s = b->element[SPLAT_ELEMENT(element)]; \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ + r->element[i] = s; \ + } \ + } +VSPLT(b, u8) +VSPLT(h, u16) +VSPLT(w, u32) +#undef VSPLT +#undef SPLAT_ELEMENT +#undef _SPLAT_MASKED + +#define VSPLTI(suffix, element, splat_type) \ + void helper_vspltis##suffix (ppc_avr_t *r, uint32_t splat) \ + { \ + splat_type x = (int8_t)(splat << 3) >> 3; \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ + r->element[i] = x; \ + } \ + } +VSPLTI(b, s8, int8_t) +VSPLTI(h, s16, int16_t) +VSPLTI(w, s32, int32_t) +#undef VSPLTI + +#define VSR(suffix, element) \ + void helper_vsr##suffix (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + { \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ + unsigned int mask = ((1 << (3 + (sizeof (a->element[0]) >> 1))) - 1); \ + unsigned int shift = b->element[i] & mask; \ + r->element[i] = a->element[i] >> shift; \ + } \ + } +VSR(ab, s8) +VSR(ah, s16) +VSR(aw, s32) +VSR(b, u8) +VSR(h, u16) +VSR(w, u32) +#undef VSR + +void helper_vsro (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf; + +#if defined (WORDS_BIGENDIAN) + memmove (&r->u8[sh], &a->u8[0], 16-sh); + memset (&r->u8[0], 0, sh); +#else + memmove (&r->u8[0], &a->u8[sh], 16-sh); + memset (&r->u8[16-sh], 0, sh); +#endif +} + +void helper_vsubcuw (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + int i; + for (i = 0; i < ARRAY_SIZE(r->u32); i++) { + r->u32[i] = a->u32[i] >= b->u32[i]; + } +} + +void helper_vsumsws (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + int64_t t; + int i, upper; + ppc_avr_t result; + int sat = 0; + +#if defined(WORDS_BIGENDIAN) + upper = ARRAY_SIZE(r->s32)-1; +#else + upper = 0; +#endif + t = (int64_t)b->s32[upper]; + for (i = 0; i < ARRAY_SIZE(r->s32); i++) { + t += a->s32[i]; + result.s32[i] = 0; + } + result.s32[upper] = cvtsdsw(t, &sat); + *r = result; + + if (sat) { + env->vscr |= (1 << VSCR_SAT); + } +} + +void helper_vsum2sws (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + int i, j, upper; + ppc_avr_t result; + int sat = 0; + +#if defined(WORDS_BIGENDIAN) + upper = 1; +#else + upper = 0; +#endif + for (i = 0; i < ARRAY_SIZE(r->u64); i++) { + int64_t t = (int64_t)b->s32[upper+i*2]; + result.u64[i] = 0; + for (j = 0; j < ARRAY_SIZE(r->u64); j++) { + t += a->s32[2*i+j]; + } + result.s32[upper+i*2] = cvtsdsw(t, &sat); + } + + *r = result; + if (sat) { + env->vscr |= (1 << VSCR_SAT); + } +} + +void helper_vsum4sbs (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + int i, j; + int sat = 0; + + for (i = 0; i < ARRAY_SIZE(r->s32); i++) { + int64_t t = (int64_t)b->s32[i]; + for (j = 0; j < ARRAY_SIZE(r->s32); j++) { + t += a->s8[4*i+j]; + } + r->s32[i] = cvtsdsw(t, &sat); + } + + if (sat) { + env->vscr |= (1 << VSCR_SAT); + } +} + +void helper_vsum4shs (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + int sat = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(r->s32); i++) { + int64_t t = (int64_t)b->s32[i]; + t += a->s16[2*i] + a->s16[2*i+1]; + r->s32[i] = cvtsdsw(t, &sat); + } + + if (sat) { + env->vscr |= (1 << VSCR_SAT); + } +} + +void helper_vsum4ubs (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + int i, j; + int sat = 0; + + for (i = 0; i < ARRAY_SIZE(r->u32); i++) { + uint64_t t = (uint64_t)b->u32[i]; + for (j = 0; j < ARRAY_SIZE(r->u32); j++) { + t += a->u8[4*i+j]; + } + r->u32[i] = cvtuduw(t, &sat); + } + + if (sat) { + env->vscr |= (1 << VSCR_SAT); + } +} + +#if defined(WORDS_BIGENDIAN) +#define UPKHI 1 +#define UPKLO 0 +#else +#define UPKHI 0 +#define UPKLO 1 +#endif +#define VUPKPX(suffix, hi) \ + void helper_vupk##suffix (ppc_avr_t *r, ppc_avr_t *b) \ + { \ + int i; \ + ppc_avr_t result; \ + for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ + uint16_t e = b->u16[hi ? i : i+4]; \ + uint8_t a = (e >> 15) ? 0xff : 0; \ + uint8_t r = (e >> 10) & 0x1f; \ + uint8_t g = (e >> 5) & 0x1f; \ + uint8_t b = e & 0x1f; \ + result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ + } \ + *r = result; \ + } +VUPKPX(lpx, UPKLO) +VUPKPX(hpx, UPKHI) +#undef VUPKPX + +#define VUPK(suffix, unpacked, packee, hi) \ + void helper_vupk##suffix (ppc_avr_t *r, ppc_avr_t *b) \ + { \ + int i; \ + ppc_avr_t result; \ + if (hi) { \ + for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ + result.unpacked[i] = b->packee[i]; \ + } \ + } else { \ + for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); i++) { \ + result.unpacked[i-ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ + } \ + } \ + *r = result; \ + } +VUPK(hsb, s16, s8, UPKHI) +VUPK(hsh, s32, s16, UPKHI) +VUPK(lsb, s16, s8, UPKLO) +VUPK(lsh, s32, s16, UPKLO) +#undef VUPK +#undef UPKHI +#undef UPKLO + +#undef DO_HANDLE_NAN +#undef HANDLE_NAN1 +#undef HANDLE_NAN2 +#undef HANDLE_NAN3 #undef VECTOR_FOR_INORDER_I #undef HI_IDX #undef LO_IDX @@ -2084,7 +3115,7 @@ static always_inline uint32_t efscfsi (uint32_t val) { CPU_FloatU u; - u.f = int32_to_float32(val, &env->spe_status); + u.f = int32_to_float32(val, &env->vec_status); return u.l; } @@ -2093,7 +3124,7 @@ static always_inline uint32_t efscfui (uint32_t val) { CPU_FloatU u; - u.f = uint32_to_float32(val, &env->spe_status); + u.f = uint32_to_float32(val, &env->vec_status); return u.l; } @@ -2107,7 +3138,7 @@ static always_inline int32_t efsctsi (uint32_t val) if (unlikely(float32_is_nan(u.f))) return 0; - return float32_to_int32(u.f, &env->spe_status); + return float32_to_int32(u.f, &env->vec_status); } static always_inline uint32_t efsctui (uint32_t val) @@ -2119,7 +3150,7 @@ static always_inline uint32_t efsctui (uint32_t val) if (unlikely(float32_is_nan(u.f))) return 0; - return float32_to_uint32(u.f, &env->spe_status); + return float32_to_uint32(u.f, &env->vec_status); } static always_inline uint32_t efsctsiz (uint32_t val) @@ -2131,7 +3162,7 @@ static always_inline uint32_t efsctsiz (uint32_t val) if (unlikely(float32_is_nan(u.f))) return 0; - return float32_to_int32_round_to_zero(u.f, &env->spe_status); + return float32_to_int32_round_to_zero(u.f, &env->vec_status); } static always_inline uint32_t efsctuiz (uint32_t val) @@ -2143,7 +3174,7 @@ static always_inline uint32_t efsctuiz (uint32_t val) if (unlikely(float32_is_nan(u.f))) return 0; - return float32_to_uint32_round_to_zero(u.f, &env->spe_status); + return float32_to_uint32_round_to_zero(u.f, &env->vec_status); } static always_inline uint32_t efscfsf (uint32_t val) @@ -2151,9 +3182,9 @@ static always_inline uint32_t efscfsf (uint32_t val) CPU_FloatU u; float32 tmp; - u.f = int32_to_float32(val, &env->spe_status); - tmp = int64_to_float32(1ULL << 32, &env->spe_status); - u.f = float32_div(u.f, tmp, &env->spe_status); + u.f = int32_to_float32(val, &env->vec_status); + tmp = int64_to_float32(1ULL << 32, &env->vec_status); + u.f = float32_div(u.f, tmp, &env->vec_status); return u.l; } @@ -2163,9 +3194,9 @@ static always_inline uint32_t efscfuf (uint32_t val) CPU_FloatU u; float32 tmp; - u.f = uint32_to_float32(val, &env->spe_status); - tmp = uint64_to_float32(1ULL << 32, &env->spe_status); - u.f = float32_div(u.f, tmp, &env->spe_status); + u.f = uint32_to_float32(val, &env->vec_status); + tmp = uint64_to_float32(1ULL << 32, &env->vec_status); + u.f = float32_div(u.f, tmp, &env->vec_status); return u.l; } @@ -2179,10 +3210,10 @@ static always_inline uint32_t efsctsf (uint32_t val) /* NaN are not treated the same way IEEE 754 does */ if (unlikely(float32_is_nan(u.f))) return 0; - tmp = uint64_to_float32(1ULL << 32, &env->spe_status); - u.f = float32_mul(u.f, tmp, &env->spe_status); + tmp = uint64_to_float32(1ULL << 32, &env->vec_status); + u.f = float32_mul(u.f, tmp, &env->vec_status); - return float32_to_int32(u.f, &env->spe_status); + return float32_to_int32(u.f, &env->vec_status); } static always_inline uint32_t efsctuf (uint32_t val) @@ -2194,10 +3225,10 @@ static always_inline uint32_t efsctuf (uint32_t val) /* NaN are not treated the same way IEEE 754 does */ if (unlikely(float32_is_nan(u.f))) return 0; - tmp = uint64_to_float32(1ULL << 32, &env->spe_status); - u.f = float32_mul(u.f, tmp, &env->spe_status); + tmp = uint64_to_float32(1ULL << 32, &env->vec_status); + u.f = float32_mul(u.f, tmp, &env->vec_status); - return float32_to_uint32(u.f, &env->spe_status); + return float32_to_uint32(u.f, &env->vec_status); } #define HELPER_SPE_SINGLE_CONV(name) \ @@ -2259,7 +3290,7 @@ static always_inline uint32_t efsadd (uint32_t op1, uint32_t op2) CPU_FloatU u1, u2; u1.l = op1; u2.l = op2; - u1.f = float32_add(u1.f, u2.f, &env->spe_status); + u1.f = float32_add(u1.f, u2.f, &env->vec_status); return u1.l; } @@ -2268,7 +3299,7 @@ static always_inline uint32_t efssub (uint32_t op1, uint32_t op2) CPU_FloatU u1, u2; u1.l = op1; u2.l = op2; - u1.f = float32_sub(u1.f, u2.f, &env->spe_status); + u1.f = float32_sub(u1.f, u2.f, &env->vec_status); return u1.l; } @@ -2277,7 +3308,7 @@ static always_inline uint32_t efsmul (uint32_t op1, uint32_t op2) CPU_FloatU u1, u2; u1.l = op1; u2.l = op2; - u1.f = float32_mul(u1.f, u2.f, &env->spe_status); + u1.f = float32_mul(u1.f, u2.f, &env->vec_status); return u1.l; } @@ -2286,7 +3317,7 @@ static always_inline uint32_t efsdiv (uint32_t op1, uint32_t op2) CPU_FloatU u1, u2; u1.l = op1; u2.l = op2; - u1.f = float32_div(u1.f, u2.f, &env->spe_status); + u1.f = float32_div(u1.f, u2.f, &env->vec_status); return u1.l; } @@ -2325,7 +3356,7 @@ static always_inline uint32_t efststlt (uint32_t op1, uint32_t op2) CPU_FloatU u1, u2; u1.l = op1; u2.l = op2; - return float32_lt(u1.f, u2.f, &env->spe_status) ? 4 : 0; + return float32_lt(u1.f, u2.f, &env->vec_status) ? 4 : 0; } static always_inline uint32_t efststgt (uint32_t op1, uint32_t op2) @@ -2333,7 +3364,7 @@ static always_inline uint32_t efststgt (uint32_t op1, uint32_t op2) CPU_FloatU u1, u2; u1.l = op1; u2.l = op2; - return float32_le(u1.f, u2.f, &env->spe_status) ? 0 : 4; + return float32_le(u1.f, u2.f, &env->vec_status) ? 0 : 4; } static always_inline uint32_t efststeq (uint32_t op1, uint32_t op2) @@ -2341,7 +3372,7 @@ static always_inline uint32_t efststeq (uint32_t op1, uint32_t op2) CPU_FloatU u1, u2; u1.l = op1; u2.l = op2; - return float32_eq(u1.f, u2.f, &env->spe_status) ? 4 : 0; + return float32_eq(u1.f, u2.f, &env->vec_status) ? 4 : 0; } static always_inline uint32_t efscmplt (uint32_t op1, uint32_t op2) @@ -2408,7 +3439,7 @@ uint64_t helper_efdcfsi (uint32_t val) { CPU_DoubleU u; - u.d = int32_to_float64(val, &env->spe_status); + u.d = int32_to_float64(val, &env->vec_status); return u.ll; } @@ -2417,7 +3448,7 @@ uint64_t helper_efdcfsid (uint64_t val) { CPU_DoubleU u; - u.d = int64_to_float64(val, &env->spe_status); + u.d = int64_to_float64(val, &env->vec_status); return u.ll; } @@ -2426,7 +3457,7 @@ uint64_t helper_efdcfui (uint32_t val) { CPU_DoubleU u; - u.d = uint32_to_float64(val, &env->spe_status); + u.d = uint32_to_float64(val, &env->vec_status); return u.ll; } @@ -2435,7 +3466,7 @@ uint64_t helper_efdcfuid (uint64_t val) { CPU_DoubleU u; - u.d = uint64_to_float64(val, &env->spe_status); + u.d = uint64_to_float64(val, &env->vec_status); return u.ll; } @@ -2449,7 +3480,7 @@ uint32_t helper_efdctsi (uint64_t val) if (unlikely(float64_is_nan(u.d))) return 0; - return float64_to_int32(u.d, &env->spe_status); + return float64_to_int32(u.d, &env->vec_status); } uint32_t helper_efdctui (uint64_t val) @@ -2461,7 +3492,7 @@ uint32_t helper_efdctui (uint64_t val) if (unlikely(float64_is_nan(u.d))) return 0; - return float64_to_uint32(u.d, &env->spe_status); + return float64_to_uint32(u.d, &env->vec_status); } uint32_t helper_efdctsiz (uint64_t val) @@ -2473,7 +3504,7 @@ uint32_t helper_efdctsiz (uint64_t val) if (unlikely(float64_is_nan(u.d))) return 0; - return float64_to_int32_round_to_zero(u.d, &env->spe_status); + return float64_to_int32_round_to_zero(u.d, &env->vec_status); } uint64_t helper_efdctsidz (uint64_t val) @@ -2485,7 +3516,7 @@ uint64_t helper_efdctsidz (uint64_t val) if (unlikely(float64_is_nan(u.d))) return 0; - return float64_to_int64_round_to_zero(u.d, &env->spe_status); + return float64_to_int64_round_to_zero(u.d, &env->vec_status); } uint32_t helper_efdctuiz (uint64_t val) @@ -2497,7 +3528,7 @@ uint32_t helper_efdctuiz (uint64_t val) if (unlikely(float64_is_nan(u.d))) return 0; - return float64_to_uint32_round_to_zero(u.d, &env->spe_status); + return float64_to_uint32_round_to_zero(u.d, &env->vec_status); } uint64_t helper_efdctuidz (uint64_t val) @@ -2509,7 +3540,7 @@ uint64_t helper_efdctuidz (uint64_t val) if (unlikely(float64_is_nan(u.d))) return 0; - return float64_to_uint64_round_to_zero(u.d, &env->spe_status); + return float64_to_uint64_round_to_zero(u.d, &env->vec_status); } uint64_t helper_efdcfsf (uint32_t val) @@ -2517,9 +3548,9 @@ uint64_t helper_efdcfsf (uint32_t val) CPU_DoubleU u; float64 tmp; - u.d = int32_to_float64(val, &env->spe_status); - tmp = int64_to_float64(1ULL << 32, &env->spe_status); - u.d = float64_div(u.d, tmp, &env->spe_status); + u.d = int32_to_float64(val, &env->vec_status); + tmp = int64_to_float64(1ULL << 32, &env->vec_status); + u.d = float64_div(u.d, tmp, &env->vec_status); return u.ll; } @@ -2529,9 +3560,9 @@ uint64_t helper_efdcfuf (uint32_t val) CPU_DoubleU u; float64 tmp; - u.d = uint32_to_float64(val, &env->spe_status); - tmp = int64_to_float64(1ULL << 32, &env->spe_status); - u.d = float64_div(u.d, tmp, &env->spe_status); + u.d = uint32_to_float64(val, &env->vec_status); + tmp = int64_to_float64(1ULL << 32, &env->vec_status); + u.d = float64_div(u.d, tmp, &env->vec_status); return u.ll; } @@ -2545,10 +3576,10 @@ uint32_t helper_efdctsf (uint64_t val) /* NaN are not treated the same way IEEE 754 does */ if (unlikely(float64_is_nan(u.d))) return 0; - tmp = uint64_to_float64(1ULL << 32, &env->spe_status); - u.d = float64_mul(u.d, tmp, &env->spe_status); + tmp = uint64_to_float64(1ULL << 32, &env->vec_status); + u.d = float64_mul(u.d, tmp, &env->vec_status); - return float64_to_int32(u.d, &env->spe_status); + return float64_to_int32(u.d, &env->vec_status); } uint32_t helper_efdctuf (uint64_t val) @@ -2560,10 +3591,10 @@ uint32_t helper_efdctuf (uint64_t val) /* NaN are not treated the same way IEEE 754 does */ if (unlikely(float64_is_nan(u.d))) return 0; - tmp = uint64_to_float64(1ULL << 32, &env->spe_status); - u.d = float64_mul(u.d, tmp, &env->spe_status); + tmp = uint64_to_float64(1ULL << 32, &env->vec_status); + u.d = float64_mul(u.d, tmp, &env->vec_status); - return float64_to_uint32(u.d, &env->spe_status); + return float64_to_uint32(u.d, &env->vec_status); } uint32_t helper_efscfd (uint64_t val) @@ -2572,7 +3603,7 @@ uint32_t helper_efscfd (uint64_t val) CPU_FloatU u2; u1.ll = val; - u2.f = float64_to_float32(u1.d, &env->spe_status); + u2.f = float64_to_float32(u1.d, &env->vec_status); return u2.l; } @@ -2583,7 +3614,7 @@ uint64_t helper_efdcfs (uint32_t val) CPU_FloatU u1; u1.l = val; - u2.d = float32_to_float64(u1.f, &env->spe_status); + u2.d = float32_to_float64(u1.f, &env->vec_status); return u2.ll; } @@ -2594,7 +3625,7 @@ uint64_t helper_efdadd (uint64_t op1, uint64_t op2) CPU_DoubleU u1, u2; u1.ll = op1; u2.ll = op2; - u1.d = float64_add(u1.d, u2.d, &env->spe_status); + u1.d = float64_add(u1.d, u2.d, &env->vec_status); return u1.ll; } @@ -2603,7 +3634,7 @@ uint64_t helper_efdsub (uint64_t op1, uint64_t op2) CPU_DoubleU u1, u2; u1.ll = op1; u2.ll = op2; - u1.d = float64_sub(u1.d, u2.d, &env->spe_status); + u1.d = float64_sub(u1.d, u2.d, &env->vec_status); return u1.ll; } @@ -2612,7 +3643,7 @@ uint64_t helper_efdmul (uint64_t op1, uint64_t op2) CPU_DoubleU u1, u2; u1.ll = op1; u2.ll = op2; - u1.d = float64_mul(u1.d, u2.d, &env->spe_status); + u1.d = float64_mul(u1.d, u2.d, &env->vec_status); return u1.ll; } @@ -2621,7 +3652,7 @@ uint64_t helper_efddiv (uint64_t op1, uint64_t op2) CPU_DoubleU u1, u2; u1.ll = op1; u2.ll = op2; - u1.d = float64_div(u1.d, u2.d, &env->spe_status); + u1.d = float64_div(u1.d, u2.d, &env->vec_status); return u1.ll; } @@ -2631,7 +3662,7 @@ uint32_t helper_efdtstlt (uint64_t op1, uint64_t op2) CPU_DoubleU u1, u2; u1.ll = op1; u2.ll = op2; - return float64_lt(u1.d, u2.d, &env->spe_status) ? 4 : 0; + return float64_lt(u1.d, u2.d, &env->vec_status) ? 4 : 0; } uint32_t helper_efdtstgt (uint64_t op1, uint64_t op2) @@ -2639,7 +3670,7 @@ uint32_t helper_efdtstgt (uint64_t op1, uint64_t op2) CPU_DoubleU u1, u2; u1.ll = op1; u2.ll = op2; - return float64_le(u1.d, u2.d, &env->spe_status) ? 0 : 4; + return float64_le(u1.d, u2.d, &env->vec_status) ? 0 : 4; } uint32_t helper_efdtsteq (uint64_t op1, uint64_t op2) @@ -2647,7 +3678,7 @@ uint32_t helper_efdtsteq (uint64_t op1, uint64_t op2) CPU_DoubleU u1, u2; u1.ll = op1; u2.ll = op2; - return float64_eq(u1.d, u2.d, &env->spe_status) ? 4 : 0; + return float64_eq(u1.d, u2.d, &env->vec_status) ? 4 : 0; } uint32_t helper_efdcmplt (uint64_t op1, uint64_t op2) @@ -2780,13 +3811,9 @@ static void do_6xx_tlb (target_ulong new_EPN, int is_code) EPN = env->spr[SPR_DMISS]; } way = (env->spr[SPR_SRR1] >> 17) & 1; -#if defined (DEBUG_SOFTWARE_TLB) - if (loglevel != 0) { - fprintf(logfile, "%s: EPN " ADDRX " " ADDRX " PTE0 " ADDRX + LOG_SWTLB("%s: EPN " ADDRX " " ADDRX " PTE0 " ADDRX " PTE1 " ADDRX " way %d\n", __func__, new_EPN, EPN, CMP, RPN, way); - } -#endif /* Store this TLB */ ppc6xx_tlb_store(env, (uint32_t)(new_EPN & TARGET_PAGE_MASK), way, is_code, CMP, RPN); @@ -2812,13 +3839,9 @@ static void do_74xx_tlb (target_ulong new_EPN, int is_code) CMP = env->spr[SPR_PTEHI]; EPN = env->spr[SPR_TLBMISS] & ~0x3; way = env->spr[SPR_TLBMISS] & 0x3; -#if defined (DEBUG_SOFTWARE_TLB) - if (loglevel != 0) { - fprintf(logfile, "%s: EPN " ADDRX " " ADDRX " PTE0 " ADDRX + LOG_SWTLB("%s: EPN " ADDRX " " ADDRX " PTE0 " ADDRX " PTE1 " ADDRX " way %d\n", __func__, new_EPN, EPN, CMP, RPN, way); - } -#endif /* Store this TLB */ ppc6xx_tlb_store(env, (uint32_t)(new_EPN & TARGET_PAGE_MASK), way, is_code, CMP, RPN); @@ -2942,22 +3965,14 @@ void helper_4xx_tlbwe_hi (target_ulong entry, target_ulong val) ppcemb_tlb_t *tlb; target_ulong page, end; -#if defined (DEBUG_SOFTWARE_TLB) - if (loglevel != 0) { - fprintf(logfile, "%s entry %d val " ADDRX "\n", __func__, (int)entry, val); - } -#endif + LOG_SWTLB("%s entry %d val " ADDRX "\n", __func__, (int)entry, val); entry &= 0x3F; tlb = &env->tlb[entry].tlbe; /* Invalidate previous TLB (if it's valid) */ if (tlb->prot & PAGE_VALID) { end = tlb->EPN + tlb->size; -#if defined (DEBUG_SOFTWARE_TLB) - if (loglevel != 0) { - fprintf(logfile, "%s: invalidate old TLB %d start " ADDRX + LOG_SWTLB("%s: invalidate old TLB %d start " ADDRX " end " ADDRX "\n", __func__, (int)entry, tlb->EPN, end); - } -#endif for (page = tlb->EPN; page < end; page += TARGET_PAGE_SIZE) tlb_flush_page(env, page); } @@ -2982,26 +3997,18 @@ void helper_4xx_tlbwe_hi (target_ulong entry, target_ulong val) } tlb->PID = env->spr[SPR_40x_PID]; /* PID */ tlb->attr = val & 0xFF; -#if defined (DEBUG_SOFTWARE_TLB) - if (loglevel != 0) { - fprintf(logfile, "%s: set up TLB %d RPN " PADDRX " EPN " ADDRX + LOG_SWTLB("%s: set up TLB %d RPN " PADDRX " EPN " ADDRX " size " ADDRX " prot %c%c%c%c PID %d\n", __func__, (int)entry, tlb->RPN, tlb->EPN, tlb->size, tlb->prot & PAGE_READ ? 'r' : '-', tlb->prot & PAGE_WRITE ? 'w' : '-', tlb->prot & PAGE_EXEC ? 'x' : '-', tlb->prot & PAGE_VALID ? 'v' : '-', (int)tlb->PID); - } -#endif /* Invalidate new TLB (if valid) */ if (tlb->prot & PAGE_VALID) { end = tlb->EPN + tlb->size; -#if defined (DEBUG_SOFTWARE_TLB) - if (loglevel != 0) { - fprintf(logfile, "%s: invalidate TLB %d start " ADDRX + LOG_SWTLB("%s: invalidate TLB %d start " ADDRX " end " ADDRX "\n", __func__, (int)entry, tlb->EPN, end); - } -#endif for (page = tlb->EPN; page < end; page += TARGET_PAGE_SIZE) tlb_flush_page(env, page); } @@ -3011,11 +4018,7 @@ void helper_4xx_tlbwe_lo (target_ulong entry, target_ulong val) { ppcemb_tlb_t *tlb; -#if defined (DEBUG_SOFTWARE_TLB) - if (loglevel != 0) { - fprintf(logfile, "%s entry %i val " ADDRX "\n", __func__, (int)entry, val); - } -#endif + LOG_SWTLB("%s entry %i val " ADDRX "\n", __func__, (int)entry, val); entry &= 0x3F; tlb = &env->tlb[entry].tlbe; tlb->RPN = val & 0xFFFFFC00; @@ -3024,17 +4027,13 @@ void helper_4xx_tlbwe_lo (target_ulong entry, target_ulong val) tlb->prot |= PAGE_EXEC; if (val & 0x100) tlb->prot |= PAGE_WRITE; -#if defined (DEBUG_SOFTWARE_TLB) - if (loglevel != 0) { - fprintf(logfile, "%s: set up TLB %d RPN " PADDRX " EPN " ADDRX + LOG_SWTLB("%s: set up TLB %d RPN " PADDRX " EPN " ADDRX " size " ADDRX " prot %c%c%c%c PID %d\n", __func__, (int)entry, tlb->RPN, tlb->EPN, tlb->size, tlb->prot & PAGE_READ ? 'r' : '-', tlb->prot & PAGE_WRITE ? 'w' : '-', tlb->prot & PAGE_EXEC ? 'x' : '-', tlb->prot & PAGE_VALID ? 'v' : '-', (int)tlb->PID); - } -#endif } target_ulong helper_4xx_tlbsx (target_ulong address) @@ -3049,12 +4048,8 @@ void helper_440_tlbwe (uint32_t word, target_ulong entry, target_ulong value) target_ulong EPN, RPN, size; int do_flush_tlbs; -#if defined (DEBUG_SOFTWARE_TLB) - if (loglevel != 0) { - fprintf(logfile, "%s word %d entry %d value " ADDRX "\n", + LOG_SWTLB("%s word %d entry %d value " ADDRX "\n", __func__, word, (int)entry, value); - } -#endif do_flush_tlbs = 0; entry &= 0x3F; tlb = &env->tlb[entry].tlbe;