Browse Source

Implement all Vector Integer Instructions introduced with the

 "Vector Facility" for s390x TCG.
 -----BEGIN PGP SIGNATURE-----
 
 iQIcBAABAgAGBQJc3p0EAAoJEE3eEPcA/4NaPr0QALKxtoN0YbkLkX0zbyyMa8P4
 hqGPOy2G1iaD0Z2cAlCtEdVyHrUBpbLO4ReWc8YyqHHYsRJwUjDLwU/mpLZf4anO
 mc2AepXiB+c3IItMWYQaUICOwu6mfmdLs8dHnByl3H7+hew8yDq+sYONeMwhwJRB
 vlLqKJyDn0y74enXtljEYuvMmoB/sVYQ+2WD+Oe+hBKa68Ds0ec5ZN0g8KU+33zE
 35Tvkd/DHc5diNQC2U8goHfao76VwF9lm6azUUewx9Kontqq/BFInLqadA3v24+L
 VGU7RwTM/93p8NsWGKtuosi9YlsA+qJNRXxXI8KiLGL4wEDoArcegErDXuqMvlyv
 w8ZQOBx5sk/MFfoFVg/zFnz5QySMOMEAXJ7TFEdTbPf/NFQ5WCe77ejASWnfG/fi
 Zp4HeGzezb1/6hnvLKdbGNse0WocH0+xzY39j7/FpbjyAOtJe3MRx0cZzc+9fSWU
 urMbfvVf9oDkbByiVuLvuPVA7+wbZ0UcntLhW1H8brLKUav814mCRxTOwclwSmzo
 sINqFVg89UBGCDQfRR5L+di2YpIKySorxM4V8APntc3cF3n5JWvj6Q2AdXi1ChrK
 fIRw867Au25yUmifgfn3CK1e4XknGIGS4JRpVbMs1hNavJfaNOH6lGAtlSwWCZkX
 JzP/X/MxAUjX57YK+xHz
 =5pRI
 -----END PGP SIGNATURE-----

Merge tag 's390x-tcg-2019-05-17-2' into s390-next-staging

Implement all Vector Integer Instructions introduced with the
"Vector Facility" for s390x TCG.

# gpg: Signature made Fri 17 May 2019 01:37:40 PM CEST
# gpg:                using RSA key 4DDE10F700FF835A
# gpg: Good signature from "David Hildenbrand <david@redhat.com>" [full]
# gpg:                 aka "David Hildenbrand <davidhildenbrand@gmail.com>" [full]

* tag 's390x-tcg-2019-05-17-2': (40 commits)
  s390x/tcg: Implement VECTOR TEST UNDER MASK
  s390x/tcg: Implement VECTOR SUM ACROSS WORD
  s390x/tcg: Implement VECTOR SUM ACROSS QUADWORD
  s390x/tcg: Implement VECTOR SUM ACROSS DOUBLEWORD
  s390x/tcg: Implement VECTOR SUBTRACT WITH BORROW COMPUTE BORROW INDICATION
  s390x/tcg: Implement VECTOR SUBTRACT WITH BORROW INDICATION
  s390x/tcg: Implement VECTOR SUBTRACT COMPUTE BORROW INDICATION
  s390x/tcg: Implement VECTOR SUBTRACT
  s390x/tcg: Implement VECTOR SHIFT RIGHT LOGICAL *
  s390x/tcg: Implement VECTOR SHIFT RIGHT ARITHMETIC
  s390x/tcg: Implement VECTOR SHIFT LEFT DOUBLE BY BYTE
  s390x/tcg: Implement VECTOR SHIFT LEFT (BY BYTE)
  s390x/tcg: Implement VECTOR ELEMENT SHIFT
  s390x/tcg: Implement VECTOR ELEMENT ROTATE AND INSERT UNDER MASK
  s390x/tcg: Implement VECTOR ELEMENT ROTATE LEFT LOGICAL
  s390x/tcg: Implement VECTOR POPULATION COUNT
  s390x/tcg: Implement VECTOR OR WITH COMPLEMENT
  s390x/tcg: Implement VECTOR OR
  s390x/tcg: Implement VECTOR NOT EXCLUSIVE OR
  s390x/tcg: Implement VECTOR NOR
  ...

Signed-off-by: Cornelia Huck <cohuck@redhat.com>
tags/v4.1.0-rc0
Cornelia Huck 4 months ago
parent
commit
e85decf2f1

+ 1
- 1
target/s390x/Makefile.objs View File

@@ -1,7 +1,7 @@
obj-y += cpu.o cpu_models.o cpu_features.o gdbstub.o interrupt.o helper.o
obj-$(CONFIG_TCG) += translate.o cc_helper.o excp_helper.o fpu_helper.o
obj-$(CONFIG_TCG) += int_helper.o mem_helper.o misc_helper.o crypto_helper.o
obj-$(CONFIG_TCG) += vec_helper.o
obj-$(CONFIG_TCG) += vec_helper.o vec_int_helper.o
obj-$(CONFIG_SOFTMMU) += machine.o ioinst.o arch_dump.o mmu_helper.o diag.o
obj-$(CONFIG_SOFTMMU) += sigp.o
obj-$(CONFIG_KVM) += kvm.o

+ 17
- 0
target/s390x/cc_helper.c View File

@@ -402,6 +402,20 @@ static uint32_t cc_calc_lcbb(uint64_t dst)
return dst == 16 ? 0 : 3;
}

static uint32_t cc_calc_vc(uint64_t low, uint64_t high)
{
if (high == -1ull && low == -1ull) {
/* all elements match */
return 0;
} else if (high == 0 && low == 0) {
/* no elements match */
return 3;
} else {
/* some elements but not all match */
return 1;
}
}

static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
uint64_t src, uint64_t dst, uint64_t vr)
{
@@ -514,6 +528,9 @@ static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
case CC_OP_LCBB:
r = cc_calc_lcbb(dst);
break;
case CC_OP_VC:
r = cc_calc_vc(src, dst);
break;

case CC_OP_NZ_F32:
r = set_cc_nz_f32(dst);

+ 1
- 0
target/s390x/helper.c View File

@@ -418,6 +418,7 @@ const char *cc_name(enum cc_op cc_op)
[CC_OP_SLA_64] = "CC_OP_SLA_64",
[CC_OP_FLOGR] = "CC_OP_FLOGR",
[CC_OP_LCBB] = "CC_OP_LCBB",
[CC_OP_VC] = "CC_OP_VC",
};

return cc_names[cc_op];

+ 66
- 0
target/s390x/helper.h View File

@@ -145,6 +145,72 @@ DEF_HELPER_5(gvec_vpkls_cc64, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_FLAGS_5(gvec_vperm, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(vstl, TCG_CALL_NO_WG, void, env, cptr, i64, i64)

/* === Vector Integer Instructions === */
DEF_HELPER_FLAGS_4(gvec_vavg8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vavg16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vavgl8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vavgl16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_3(gvec_vclz8, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
DEF_HELPER_FLAGS_3(gvec_vclz16, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
DEF_HELPER_FLAGS_3(gvec_vctz8, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
DEF_HELPER_FLAGS_3(gvec_vctz16, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vgfm8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vgfm16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vgfm32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vgfm64, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vgfma8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vgfma16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vgfma32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vgfma64, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vmal8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vmal16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vmah8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vmah16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vmalh8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vmalh16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vmae8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vmae16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vmae32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vmale8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vmale16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vmale32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vmao8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vmao16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vmao32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vmalo8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vmalo16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vmalo32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vmh8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vmh16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vmlh8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vmlh16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vme8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vme16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vme32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vmle8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vmle16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vmle32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vmo8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vmo16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vmo32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vmlo8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vmlo16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vmlo32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_3(gvec_vpopct8, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
DEF_HELPER_FLAGS_3(gvec_vpopct16, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_verllv8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_verllv16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_verll8, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_verll16, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_verim8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_verim16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vsl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_vsra, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_vsrl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_vscbi8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vscbi16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_4(gvec_vtm, void, ptr, cptr, env, i32)

#ifndef CONFIG_USER_ONLY
DEF_HELPER_3(servc, i32, env, i64, i64)
DEF_HELPER_4(diag, void, env, i32, i32, i32)

+ 137
- 0
target/s390x/insn-data.def View File

@@ -1054,6 +1054,143 @@
/* VECTOR UNPACK LOGICAL LOW */
F(0xe7d4, VUPLL, VRR_a, V, 0, 0, 0, 0, vup, 0, IF_VEC)

/* === Vector Integer Instructions === */

/* VECTOR ADD */
F(0xe7f3, VA, VRR_c, V, 0, 0, 0, 0, va, 0, IF_VEC)
/* VECTOR ADD COMPUTE CARRY */
F(0xe7f1, VACC, VRR_c, V, 0, 0, 0, 0, vacc, 0, IF_VEC)
/* VECTOR ADD WITH CARRY */
F(0xe7bb, VAC, VRR_d, V, 0, 0, 0, 0, vac, 0, IF_VEC)
/* VECTOR ADD WITH CARRY COMPUTE CARRY */
F(0xe7b9, VACCC, VRR_d, V, 0, 0, 0, 0, vaccc, 0, IF_VEC)
/* VECTOR AND */
F(0xe768, VN, VRR_c, V, 0, 0, 0, 0, vn, 0, IF_VEC)
/* VECTOR AND WITH COMPLEMENT */
F(0xe769, VNC, VRR_c, V, 0, 0, 0, 0, vnc, 0, IF_VEC)
/* VECTOR AVERAGE */
F(0xe7f2, VAVG, VRR_c, V, 0, 0, 0, 0, vavg, 0, IF_VEC)
/* VECTOR AVERAGE LOGICAL */
F(0xe7f0, VAVGL, VRR_c, V, 0, 0, 0, 0, vavgl, 0, IF_VEC)
/* VECTOR CHECKSUM */
F(0xe766, VCKSM, VRR_c, V, 0, 0, 0, 0, vcksm, 0, IF_VEC)
/* VECTOR ELEMENT COMPARE */
F(0xe7db, VEC, VRR_a, V, 0, 0, 0, 0, vec, cmps64, IF_VEC)
/* VECTOR ELEMENT COMPARE LOGICAL */
F(0xe7d9, VECL, VRR_a, V, 0, 0, 0, 0, vec, cmpu64, IF_VEC)
/* VECTOR COMPARE EQUAL */
E(0xe7f8, VCEQ, VRR_b, V, 0, 0, 0, 0, vc, 0, TCG_COND_EQ, IF_VEC)
/* VECTOR COMPARE HIGH */
E(0xe7fb, VCH, VRR_b, V, 0, 0, 0, 0, vc, 0, TCG_COND_GT, IF_VEC)
/* VECTOR COMPARE HIGH LOGICAL */
E(0xe7f9, VCHL, VRR_b, V, 0, 0, 0, 0, vc, 0, TCG_COND_GTU, IF_VEC)
/* VECTOR COUNT LEADING ZEROS */
F(0xe753, VCLZ, VRR_a, V, 0, 0, 0, 0, vclz, 0, IF_VEC)
/* VECTOR COUNT TRAILING ZEROS */
F(0xe752, VCTZ, VRR_a, V, 0, 0, 0, 0, vctz, 0, IF_VEC)
/* VECTOR EXCLUSIVE OR */
F(0xe76d, VX, VRR_c, V, 0, 0, 0, 0, vx, 0, IF_VEC)
/* VECTOR GALOIS FIELD MULTIPLY SUM */
F(0xe7b4, VGFM, VRR_c, V, 0, 0, 0, 0, vgfm, 0, IF_VEC)
/* VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE */
F(0xe7bc, VGFMA, VRR_d, V, 0, 0, 0, 0, vgfma, 0, IF_VEC)
/* VECTOR LOAD COMPLEMENT */
F(0xe7de, VLC, VRR_a, V, 0, 0, 0, 0, vlc, 0, IF_VEC)
/* VECTOR LOAD POSITIVE */
F(0xe7df, VLP, VRR_a, V, 0, 0, 0, 0, vlp, 0, IF_VEC)
/* VECTOR MAXIMUM */
F(0xe7ff, VMX, VRR_c, V, 0, 0, 0, 0, vmx, 0, IF_VEC)
/* VECTOR MAXIMUM LOGICAL */
F(0xe7fd, VMXL, VRR_c, V, 0, 0, 0, 0, vmx, 0, IF_VEC)
/* VECTOR MINIMUM */
F(0xe7fe, VMN, VRR_c, V, 0, 0, 0, 0, vmx, 0, IF_VEC)
/* VECTOR MINIMUM LOGICAL */
F(0xe7fc, VMNL, VRR_c, V, 0, 0, 0, 0, vmx, 0, IF_VEC)
/* VECTOR MULTIPLY AND ADD LOW */
F(0xe7aa, VMAL, VRR_d, V, 0, 0, 0, 0, vma, 0, IF_VEC)
/* VECTOR MULTIPLY AND ADD HIGH */
F(0xe7ab, VMAH, VRR_d, V, 0, 0, 0, 0, vma, 0, IF_VEC)
/* VECTOR MULTIPLY AND ADD LOGICAL HIGH */
F(0xe7a9, VMALH, VRR_d, V, 0, 0, 0, 0, vma, 0, IF_VEC)
/* VECTOR MULTIPLY AND ADD EVEN */
F(0xe7ae, VMAE, VRR_d, V, 0, 0, 0, 0, vma, 0, IF_VEC)
/* VECTOR MULTIPLY AND ADD LOGICAL EVEN */
F(0xe7ac, VMALE, VRR_d, V, 0, 0, 0, 0, vma, 0, IF_VEC)
/* VECTOR MULTIPLY AND ADD ODD */
F(0xe7af, VMAO, VRR_d, V, 0, 0, 0, 0, vma, 0, IF_VEC)
/* VECTOR MULTIPLY AND ADD LOGICAL ODD */
F(0xe7ad, VMALO, VRR_d, V, 0, 0, 0, 0, vma, 0, IF_VEC)
/* VECTOR MULTIPLY HIGH */
F(0xe7a3, VMH, VRR_c, V, 0, 0, 0, 0, vm, 0, IF_VEC)
/* VECTOR MULTIPLY LOGICAL HIGH */
F(0xe7a1, VMLH, VRR_c, V, 0, 0, 0, 0, vm, 0, IF_VEC)
/* VECTOR MULTIPLY LOW */
F(0xe7a2, VML, VRR_c, V, 0, 0, 0, 0, vm, 0, IF_VEC)
/* VECTOR MULTIPLY EVEN */
F(0xe7a6, VME, VRR_c, V, 0, 0, 0, 0, vm, 0, IF_VEC)
/* VECTOR MULTIPLY LOGICAL EVEN */
F(0xe7a4, VMLE, VRR_c, V, 0, 0, 0, 0, vm, 0, IF_VEC)
/* VECTOR MULTIPLY ODD */
F(0xe7a7, VMO, VRR_c, V, 0, 0, 0, 0, vm, 0, IF_VEC)
/* VECTOR MULTIPLY LOGICAL ODD */
F(0xe7a5, VMLO, VRR_c, V, 0, 0, 0, 0, vm, 0, IF_VEC)
/* VECTOR NAND */
F(0xe76e, VNN, VRR_c, VE, 0, 0, 0, 0, vnn, 0, IF_VEC)
/* VECTOR NOR */
F(0xe76b, VNO, VRR_c, V, 0, 0, 0, 0, vno, 0, IF_VEC)
/* VECTOR NOT EXCLUSIVE OR */
F(0xe76c, VNX, VRR_c, VE, 0, 0, 0, 0, vnx, 0, IF_VEC)
/* VECTOR OR */
F(0xe76a, VO, VRR_c, V, 0, 0, 0, 0, vo, 0, IF_VEC)
/* VECTOR OR WITH COMPLEMENT */
F(0xe76f, VOC, VRR_c, VE, 0, 0, 0, 0, voc, 0, IF_VEC)
/* VECTOR POPULATION COUNT */
F(0xe750, VPOPCT, VRR_a, V, 0, 0, 0, 0, vpopct, 0, IF_VEC)
/* VECTOR ELEMENT ROTATE LEFT LOGICAL */
F(0xe773, VERLLV, VRR_c, V, 0, 0, 0, 0, verllv, 0, IF_VEC)
F(0xe733, VERLL, VRS_a, V, la2, 0, 0, 0, verll, 0, IF_VEC)
/* VECTOR ELEMENT ROTATE AND INSERT UNDER MASK */
F(0xe772, VERIM, VRI_d, V, 0, 0, 0, 0, verim, 0, IF_VEC)
/* VECTOR ELEMENT SHIFT LEFT */
F(0xe770, VESLV, VRR_c, V, 0, 0, 0, 0, vesv, 0, IF_VEC)
F(0xe730, VESL, VRS_a, V, la2, 0, 0, 0, ves, 0, IF_VEC)
/* VECTOR ELEMENT SHIFT RIGHT ARITHMETIC */
F(0xe77a, VESRAV, VRR_c, V, 0, 0, 0, 0, vesv, 0, IF_VEC)
F(0xe73a, VESRA, VRS_a, V, la2, 0, 0, 0, ves, 0, IF_VEC)
/* VECTOR ELEMENT SHIFT RIGHT LOGICAL */
F(0xe778, VESRLV, VRR_c, V, 0, 0, 0, 0, vesv, 0, IF_VEC)
F(0xe738, VESRL, VRS_a, V, la2, 0, 0, 0, ves, 0, IF_VEC)
/* VECTOR SHIFT LEFT */
F(0xe774, VSL, VRR_c, V, 0, 0, 0, 0, vsl, 0, IF_VEC)
/* VECTOR SHIFT LEFT BY BYTE */
F(0xe775, VSLB, VRR_c, V, 0, 0, 0, 0, vsl, 0, IF_VEC)
/* VECTOR SHIFT LEFT DOUBLE BY BYTE */
F(0xe777, VSLDB, VRI_d, V, 0, 0, 0, 0, vsldb, 0, IF_VEC)
/* VECTOR SHIFT RIGHT ARITHMETIC */
F(0xe77e, VSRA, VRR_c, V, 0, 0, 0, 0, vsra, 0, IF_VEC)
/* VECTOR SHIFT RIGHT ARITHMETIC BY BYTE */
F(0xe77f, VSRAB, VRR_c, V, 0, 0, 0, 0, vsra, 0, IF_VEC)
/* VECTOR SHIFT RIGHT LOGICAL */
F(0xe77c, VSRL, VRR_c, V, 0, 0, 0, 0, vsrl, 0, IF_VEC)
/* VECTOR SHIFT RIGHT LOGICAL BY BYTE */
F(0xe77d, VSRLB, VRR_c, V, 0, 0, 0, 0, vsrl, 0, IF_VEC)
/* VECTOR SUBTRACT */
F(0xe7f7, VS, VRR_c, V, 0, 0, 0, 0, vs, 0, IF_VEC)
/* VECTOR SUBTRACT COMPUTE BORROW INDICATION */
F(0xe7f5, VSCBI, VRR_c, V, 0, 0, 0, 0, vscbi, 0, IF_VEC)
/* VECTOR SUBTRACT WITH BORROW INDICATION */
F(0xe7bf, VSBI, VRR_d, V, 0, 0, 0, 0, vsbi, 0, IF_VEC)
/* VECTOR SUBTRACT WITH BORROW COMPUTE BORROW INDICATION */
F(0xe7bd, VSBCBI, VRR_d, V, 0, 0, 0, 0, vsbcbi, 0, IF_VEC)
/* VECTOR SUM ACROSS DOUBLEWORD */
F(0xe765, VSUMG, VRR_c, V, 0, 0, 0, 0, vsumg, 0, IF_VEC)
/* VECTOR SUM ACROSS QUADWORD */
F(0xe767, VSUMQ, VRR_c, V, 0, 0, 0, 0, vsumq, 0, IF_VEC)
/* VECTOR SUM ACROSS WORD */
F(0xe764, VSUM, VRR_c, V, 0, 0, 0, 0, vsum, 0, IF_VEC)
/* VECTOR TEST UNDER MASK */
F(0xe7d8, VTM, VRR_a, V, 0, 0, 0, 0, vtm, 0, IF_VEC)

#ifndef CONFIG_USER_ONLY
/* COMPARE AND SWAP AND PURGE */
E(0xb250, CSP, RRE, Z, r1_32u, ra2, r1_P, 0, csp, 0, MO_TEUL, IF_PRIV)

+ 1
- 0
target/s390x/internal.h View File

@@ -200,6 +200,7 @@ enum cc_op {
CC_OP_SLA_64, /* Calculate shift left signed (64bit) */
CC_OP_FLOGR, /* find leftmost one */
CC_OP_LCBB, /* load count to block boundary */
CC_OP_VC, /* vector compare result */
CC_OP_MAX
};


+ 2
- 0
target/s390x/translate.c View File

@@ -572,6 +572,7 @@ static void gen_op_calc_cc(DisasContext *s)
case CC_OP_SLA_32:
case CC_OP_SLA_64:
case CC_OP_NZ_F128:
case CC_OP_VC:
/* 2 arguments */
gen_helper_calc_cc(cc_op, cpu_env, local_cc_op, cc_src, cc_dst, dummy);
break;
@@ -6092,6 +6093,7 @@ enum DisasInsnEnum {
#define FAC_PCI S390_FEAT_ZPCI /* z/PCI facility */
#define FAC_AIS S390_FEAT_ADAPTER_INT_SUPPRESSION
#define FAC_V S390_FEAT_VECTOR /* vector facility */
#define FAC_VE S390_FEAT_VECTOR_ENH /* vector enhancements facility 1 */

static const DisasInsn insn_info[] = {
#include "insn-data.def"

+ 1420
- 0
target/s390x/translate_vx.inc.c View File

@@ -90,6 +90,33 @@ static void read_vec_element_i64(TCGv_i64 dst, uint8_t reg, uint8_t enr,
}
}

static void read_vec_element_i32(TCGv_i32 dst, uint8_t reg, uint8_t enr,
TCGMemOp memop)
{
const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);

switch (memop) {
case ES_8:
tcg_gen_ld8u_i32(dst, cpu_env, offs);
break;
case ES_16:
tcg_gen_ld16u_i32(dst, cpu_env, offs);
break;
case ES_8 | MO_SIGN:
tcg_gen_ld8s_i32(dst, cpu_env, offs);
break;
case ES_16 | MO_SIGN:
tcg_gen_ld16s_i32(dst, cpu_env, offs);
break;
case ES_32:
case ES_32 | MO_SIGN:
tcg_gen_ld_i32(dst, cpu_env, offs);
break;
default:
g_assert_not_reached();
}
}

static void write_vec_element_i64(TCGv_i64 src, int reg, uint8_t enr,
TCGMemOp memop)
{
@@ -113,6 +140,25 @@ static void write_vec_element_i64(TCGv_i64 src, int reg, uint8_t enr,
}
}

static void write_vec_element_i32(TCGv_i32 src, int reg, uint8_t enr,
TCGMemOp memop)
{
const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);

switch (memop) {
case ES_8:
tcg_gen_st8_i32(src, cpu_env, offs);
break;
case ES_16:
tcg_gen_st16_i32(src, cpu_env, offs);
break;
case ES_32:
tcg_gen_st_i32(src, cpu_env, offs);
break;
default:
g_assert_not_reached();
}
}

static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
uint8_t es)
@@ -136,12 +182,30 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
tcg_temp_free_i64(tmp);
}

#define gen_gvec_2(v1, v2, gen) \
tcg_gen_gvec_2(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
16, 16, gen)
#define gen_gvec_2s(v1, v2, c, gen) \
tcg_gen_gvec_2s(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
16, 16, c, gen)
#define gen_gvec_2i_ool(v1, v2, c, data, fn) \
tcg_gen_gvec_2i_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
c, 16, 16, data, fn)
#define gen_gvec_2_ptr(v1, v2, ptr, data, fn) \
tcg_gen_gvec_2_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
ptr, 16, 16, data, fn)
#define gen_gvec_3(v1, v2, v3, gen) \
tcg_gen_gvec_3(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
vec_full_reg_offset(v3), 16, 16, gen)
#define gen_gvec_3_ool(v1, v2, v3, data, fn) \
tcg_gen_gvec_3_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
vec_full_reg_offset(v3), 16, 16, data, fn)
#define gen_gvec_3_ptr(v1, v2, v3, ptr, data, fn) \
tcg_gen_gvec_3_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
vec_full_reg_offset(v3), ptr, 16, 16, data, fn)
#define gen_gvec_3i(v1, v2, v3, c, gen) \
tcg_gen_gvec_3i(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
vec_full_reg_offset(v3), c, 16, 16, gen)
#define gen_gvec_4(v1, v2, v3, v4, gen) \
tcg_gen_gvec_4(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
@@ -157,6 +221,85 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
16)
#define gen_gvec_dup64i(v1, c) \
tcg_gen_gvec_dup64i(vec_full_reg_offset(v1), 16, 16, c)
#define gen_gvec_fn_2(fn, es, v1, v2) \
tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
16, 16)
#define gen_gvec_fn_2i(fn, es, v1, v2, c) \
tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
c, 16, 16)
#define gen_gvec_fn_2s(fn, es, v1, v2, s) \
tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
s, 16, 16)
#define gen_gvec_fn_3(fn, es, v1, v2, v3) \
tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
vec_full_reg_offset(v3), 16, 16)

/*
* Helper to carry out a 128 bit vector computation using 2 i64 values per
* vector.
*/
typedef void (*gen_gvec128_3_i64_fn)(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh);
static void gen_gvec128_3_i64(gen_gvec128_3_i64_fn fn, uint8_t d, uint8_t a,
uint8_t b)
{
TCGv_i64 dh = tcg_temp_new_i64();
TCGv_i64 dl = tcg_temp_new_i64();
TCGv_i64 ah = tcg_temp_new_i64();
TCGv_i64 al = tcg_temp_new_i64();
TCGv_i64 bh = tcg_temp_new_i64();
TCGv_i64 bl = tcg_temp_new_i64();

read_vec_element_i64(ah, a, 0, ES_64);
read_vec_element_i64(al, a, 1, ES_64);
read_vec_element_i64(bh, b, 0, ES_64);
read_vec_element_i64(bl, b, 1, ES_64);
fn(dl, dh, al, ah, bl, bh);
write_vec_element_i64(dh, d, 0, ES_64);
write_vec_element_i64(dl, d, 1, ES_64);

tcg_temp_free_i64(dh);
tcg_temp_free_i64(dl);
tcg_temp_free_i64(ah);
tcg_temp_free_i64(al);
tcg_temp_free_i64(bh);
tcg_temp_free_i64(bl);
}

typedef void (*gen_gvec128_4_i64_fn)(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh,
TCGv_i64 cl, TCGv_i64 ch);
static void gen_gvec128_4_i64(gen_gvec128_4_i64_fn fn, uint8_t d, uint8_t a,
uint8_t b, uint8_t c)
{
TCGv_i64 dh = tcg_temp_new_i64();
TCGv_i64 dl = tcg_temp_new_i64();
TCGv_i64 ah = tcg_temp_new_i64();
TCGv_i64 al = tcg_temp_new_i64();
TCGv_i64 bh = tcg_temp_new_i64();
TCGv_i64 bl = tcg_temp_new_i64();
TCGv_i64 ch = tcg_temp_new_i64();
TCGv_i64 cl = tcg_temp_new_i64();

read_vec_element_i64(ah, a, 0, ES_64);
read_vec_element_i64(al, a, 1, ES_64);
read_vec_element_i64(bh, b, 0, ES_64);
read_vec_element_i64(bl, b, 1, ES_64);
read_vec_element_i64(ch, c, 0, ES_64);
read_vec_element_i64(cl, c, 1, ES_64);
fn(dl, dh, al, ah, bl, bh, cl, ch);
write_vec_element_i64(dh, d, 0, ES_64);
write_vec_element_i64(dl, d, 1, ES_64);

tcg_temp_free_i64(dh);
tcg_temp_free_i64(dl);
tcg_temp_free_i64(ah);
tcg_temp_free_i64(al);
tcg_temp_free_i64(bh);
tcg_temp_free_i64(bl);
tcg_temp_free_i64(ch);
tcg_temp_free_i64(cl);
}

static void gen_gvec_dupi(uint8_t es, uint8_t reg, uint64_t c)
{
@@ -183,6 +326,17 @@ static void zero_vec(uint8_t reg)
tcg_gen_gvec_dup8i(vec_full_reg_offset(reg), 16, 16, 0);
}

static void gen_addi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
uint64_t b)
{
TCGv_i64 bl = tcg_const_i64(b);
TCGv_i64 bh = tcg_const_i64(0);

tcg_gen_add2_i64(dl, dh, al, ah, bl, bh);
tcg_temp_free_i64(bl);
tcg_temp_free_i64(bh);
}

static DisasJumpType op_vge(DisasContext *s, DisasOps *o)
{
const uint8_t es = s->insn->data;
@@ -933,3 +1087,1269 @@ static DisasJumpType op_vup(DisasContext *s, DisasOps *o)
tcg_temp_free_i64(tmp);
return DISAS_NEXT;
}

static DisasJumpType op_va(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m4);

if (es > ES_128) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
} else if (es == ES_128) {
gen_gvec128_3_i64(tcg_gen_add2_i64, get_field(s->fields, v1),
get_field(s->fields, v2), get_field(s->fields, v3));
return DISAS_NEXT;
}
gen_gvec_fn_3(add, es, get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3));
return DISAS_NEXT;
}

static void gen_acc(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, uint8_t es)
{
const uint8_t msb_bit_nr = NUM_VEC_ELEMENT_BITS(es) - 1;
TCGv_i64 msb_mask = tcg_const_i64(dup_const(es, 1ull << msb_bit_nr));
TCGv_i64 t1 = tcg_temp_new_i64();
TCGv_i64 t2 = tcg_temp_new_i64();
TCGv_i64 t3 = tcg_temp_new_i64();

/* Calculate the carry into the MSB, ignoring the old MSBs */
tcg_gen_andc_i64(t1, a, msb_mask);
tcg_gen_andc_i64(t2, b, msb_mask);
tcg_gen_add_i64(t1, t1, t2);
/* Calculate the MSB without any carry into it */
tcg_gen_xor_i64(t3, a, b);
/* Calculate the carry out of the MSB in the MSB bit position */
tcg_gen_and_i64(d, a, b);
tcg_gen_and_i64(t1, t1, t3);
tcg_gen_or_i64(d, d, t1);
/* Isolate and shift the carry into position */
tcg_gen_and_i64(d, d, msb_mask);
tcg_gen_shri_i64(d, d, msb_bit_nr);

tcg_temp_free_i64(t1);
tcg_temp_free_i64(t2);
tcg_temp_free_i64(t3);
}

static void gen_acc8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
{
gen_acc(d, a, b, ES_8);
}

static void gen_acc16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
{
gen_acc(d, a, b, ES_16);
}

static void gen_acc_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
{
TCGv_i32 t = tcg_temp_new_i32();

tcg_gen_add_i32(t, a, b);
tcg_gen_setcond_i32(TCG_COND_LTU, d, t, b);
tcg_temp_free_i32(t);
}

static void gen_acc_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
{
TCGv_i64 t = tcg_temp_new_i64();

tcg_gen_add_i64(t, a, b);
tcg_gen_setcond_i64(TCG_COND_LTU, d, t, b);
tcg_temp_free_i64(t);
}

static void gen_acc2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
{
TCGv_i64 th = tcg_temp_new_i64();
TCGv_i64 tl = tcg_temp_new_i64();
TCGv_i64 zero = tcg_const_i64(0);

tcg_gen_add2_i64(tl, th, al, zero, bl, zero);
tcg_gen_add2_i64(tl, th, th, zero, ah, zero);
tcg_gen_add2_i64(tl, dl, tl, th, bh, zero);
tcg_gen_mov_i64(dh, zero);

tcg_temp_free_i64(th);
tcg_temp_free_i64(tl);
tcg_temp_free_i64(zero);
}

static DisasJumpType op_vacc(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m4);
static const GVecGen3 g[4] = {
{ .fni8 = gen_acc8_i64, },
{ .fni8 = gen_acc16_i64, },
{ .fni4 = gen_acc_i32, },
{ .fni8 = gen_acc_i64, },
};

if (es > ES_128) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
} else if (es == ES_128) {
gen_gvec128_3_i64(gen_acc2_i64, get_field(s->fields, v1),
get_field(s->fields, v2), get_field(s->fields, v3));
return DISAS_NEXT;
}
gen_gvec_3(get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3), &g[es]);
return DISAS_NEXT;
}

static void gen_ac2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch)
{
TCGv_i64 tl = tcg_temp_new_i64();
TCGv_i64 th = tcg_const_i64(0);

/* extract the carry only */
tcg_gen_extract_i64(tl, cl, 0, 1);
tcg_gen_add2_i64(dl, dh, al, ah, bl, bh);
tcg_gen_add2_i64(dl, dh, dl, dh, tl, th);

tcg_temp_free_i64(tl);
tcg_temp_free_i64(th);
}

static DisasJumpType op_vac(DisasContext *s, DisasOps *o)
{
if (get_field(s->fields, m5) != ES_128) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}

gen_gvec128_4_i64(gen_ac2_i64, get_field(s->fields, v1),
get_field(s->fields, v2), get_field(s->fields, v3),
get_field(s->fields, v4));
return DISAS_NEXT;
}

static void gen_accc2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch)
{
TCGv_i64 tl = tcg_temp_new_i64();
TCGv_i64 th = tcg_temp_new_i64();
TCGv_i64 zero = tcg_const_i64(0);

tcg_gen_andi_i64(tl, cl, 1);
tcg_gen_add2_i64(tl, th, tl, zero, al, zero);
tcg_gen_add2_i64(tl, th, tl, th, bl, zero);
tcg_gen_add2_i64(tl, th, th, zero, ah, zero);
tcg_gen_add2_i64(tl, dl, tl, th, bh, zero);
tcg_gen_mov_i64(dh, zero);

tcg_temp_free_i64(tl);
tcg_temp_free_i64(th);
tcg_temp_free_i64(zero);
}

static DisasJumpType op_vaccc(DisasContext *s, DisasOps *o)
{
if (get_field(s->fields, m5) != ES_128) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}

gen_gvec128_4_i64(gen_accc2_i64, get_field(s->fields, v1),
get_field(s->fields, v2), get_field(s->fields, v3),
get_field(s->fields, v4));
return DISAS_NEXT;
}

static DisasJumpType op_vn(DisasContext *s, DisasOps *o)
{
gen_gvec_fn_3(and, ES_8, get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3));
return DISAS_NEXT;
}

static DisasJumpType op_vnc(DisasContext *s, DisasOps *o)
{
gen_gvec_fn_3(andc, ES_8, get_field(s->fields, v1),
get_field(s->fields, v2), get_field(s->fields, v3));
return DISAS_NEXT;
}

static void gen_avg_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
{
TCGv_i64 t0 = tcg_temp_new_i64();
TCGv_i64 t1 = tcg_temp_new_i64();

tcg_gen_ext_i32_i64(t0, a);
tcg_gen_ext_i32_i64(t1, b);
tcg_gen_add_i64(t0, t0, t1);
tcg_gen_addi_i64(t0, t0, 1);
tcg_gen_shri_i64(t0, t0, 1);
tcg_gen_extrl_i64_i32(d, t0);

tcg_temp_free(t0);
tcg_temp_free(t1);
}

static void gen_avg_i64(TCGv_i64 dl, TCGv_i64 al, TCGv_i64 bl)
{
TCGv_i64 dh = tcg_temp_new_i64();
TCGv_i64 ah = tcg_temp_new_i64();
TCGv_i64 bh = tcg_temp_new_i64();

/* extending the sign by one bit is sufficient */
tcg_gen_extract_i64(ah, al, 63, 1);
tcg_gen_extract_i64(bh, bl, 63, 1);
tcg_gen_add2_i64(dl, dh, al, ah, bl, bh);
gen_addi2_i64(dl, dh, dl, dh, 1);
tcg_gen_extract2_i64(dl, dl, dh, 1);

tcg_temp_free_i64(dh);
tcg_temp_free_i64(ah);
tcg_temp_free_i64(bh);
}

static DisasJumpType op_vavg(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m4);
static const GVecGen3 g[4] = {
{ .fno = gen_helper_gvec_vavg8, },
{ .fno = gen_helper_gvec_vavg16, },
{ .fni4 = gen_avg_i32, },
{ .fni8 = gen_avg_i64, },
};

if (es > ES_64) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}
gen_gvec_3(get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3), &g[es]);
return DISAS_NEXT;
}

static void gen_avgl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
{
TCGv_i64 t0 = tcg_temp_new_i64();
TCGv_i64 t1 = tcg_temp_new_i64();

tcg_gen_extu_i32_i64(t0, a);
tcg_gen_extu_i32_i64(t1, b);
tcg_gen_add_i64(t0, t0, t1);
tcg_gen_addi_i64(t0, t0, 1);
tcg_gen_shri_i64(t0, t0, 1);
tcg_gen_extrl_i64_i32(d, t0);

tcg_temp_free(t0);
tcg_temp_free(t1);
}

static void gen_avgl_i64(TCGv_i64 dl, TCGv_i64 al, TCGv_i64 bl)
{
TCGv_i64 dh = tcg_temp_new_i64();
TCGv_i64 zero = tcg_const_i64(0);

tcg_gen_add2_i64(dl, dh, al, zero, bl, zero);
gen_addi2_i64(dl, dh, dl, dh, 1);
tcg_gen_extract2_i64(dl, dl, dh, 1);

tcg_temp_free_i64(dh);
tcg_temp_free_i64(zero);
}

static DisasJumpType op_vavgl(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m4);
static const GVecGen3 g[4] = {
{ .fno = gen_helper_gvec_vavgl8, },
{ .fno = gen_helper_gvec_vavgl16, },
{ .fni4 = gen_avgl_i32, },
{ .fni8 = gen_avgl_i64, },
};

if (es > ES_64) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}
gen_gvec_3(get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3), &g[es]);
return DISAS_NEXT;
}

static DisasJumpType op_vcksm(DisasContext *s, DisasOps *o)
{
TCGv_i32 tmp = tcg_temp_new_i32();
TCGv_i32 sum = tcg_temp_new_i32();
int i;

read_vec_element_i32(sum, get_field(s->fields, v3), 1, ES_32);
for (i = 0; i < 4; i++) {
read_vec_element_i32(tmp, get_field(s->fields, v2), i, ES_32);
tcg_gen_add2_i32(tmp, sum, sum, sum, tmp, tmp);
}
zero_vec(get_field(s->fields, v1));
write_vec_element_i32(sum, get_field(s->fields, v1), 1, ES_32);

tcg_temp_free_i32(tmp);
tcg_temp_free_i32(sum);
return DISAS_NEXT;
}

static DisasJumpType op_vec(DisasContext *s, DisasOps *o)
{
uint8_t es = get_field(s->fields, m3);
const uint8_t enr = NUM_VEC_ELEMENTS(es) / 2 - 1;

if (es > ES_64) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}
if (s->fields->op2 == 0xdb) {
es |= MO_SIGN;
}

o->in1 = tcg_temp_new_i64();
o->in2 = tcg_temp_new_i64();
read_vec_element_i64(o->in1, get_field(s->fields, v1), enr, es);
read_vec_element_i64(o->in2, get_field(s->fields, v2), enr, es);
return DISAS_NEXT;
}

static DisasJumpType op_vc(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m4);
TCGCond cond = s->insn->data;

if (es > ES_64) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}

tcg_gen_gvec_cmp(cond, es,
vec_full_reg_offset(get_field(s->fields, v1)),
vec_full_reg_offset(get_field(s->fields, v2)),
vec_full_reg_offset(get_field(s->fields, v3)), 16, 16);
if (get_field(s->fields, m5) & 0x1) {
TCGv_i64 low = tcg_temp_new_i64();
TCGv_i64 high = tcg_temp_new_i64();

read_vec_element_i64(high, get_field(s->fields, v1), 0, ES_64);
read_vec_element_i64(low, get_field(s->fields, v1), 1, ES_64);
gen_op_update2_cc_i64(s, CC_OP_VC, low, high);

tcg_temp_free_i64(low);
tcg_temp_free_i64(high);
}
return DISAS_NEXT;
}

static void gen_clz_i32(TCGv_i32 d, TCGv_i32 a)
{
tcg_gen_clzi_i32(d, a, 32);
}

static void gen_clz_i64(TCGv_i64 d, TCGv_i64 a)
{
tcg_gen_clzi_i64(d, a, 64);
}

static DisasJumpType op_vclz(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m3);
static const GVecGen2 g[4] = {
{ .fno = gen_helper_gvec_vclz8, },
{ .fno = gen_helper_gvec_vclz16, },
{ .fni4 = gen_clz_i32, },
{ .fni8 = gen_clz_i64, },
};

if (es > ES_64) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}
gen_gvec_2(get_field(s->fields, v1), get_field(s->fields, v2), &g[es]);
return DISAS_NEXT;
}

static void gen_ctz_i32(TCGv_i32 d, TCGv_i32 a)
{
tcg_gen_ctzi_i32(d, a, 32);
}

static void gen_ctz_i64(TCGv_i64 d, TCGv_i64 a)
{
tcg_gen_ctzi_i64(d, a, 64);
}

static DisasJumpType op_vctz(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m3);
static const GVecGen2 g[4] = {
{ .fno = gen_helper_gvec_vctz8, },
{ .fno = gen_helper_gvec_vctz16, },
{ .fni4 = gen_ctz_i32, },
{ .fni8 = gen_ctz_i64, },
};

if (es > ES_64) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}
gen_gvec_2(get_field(s->fields, v1), get_field(s->fields, v2), &g[es]);
return DISAS_NEXT;
}

static DisasJumpType op_vx(DisasContext *s, DisasOps *o)
{
gen_gvec_fn_3(xor, ES_8, get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3));
return DISAS_NEXT;
}

static DisasJumpType op_vgfm(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m4);
static const GVecGen3 g[4] = {
{ .fno = gen_helper_gvec_vgfm8, },
{ .fno = gen_helper_gvec_vgfm16, },
{ .fno = gen_helper_gvec_vgfm32, },
{ .fno = gen_helper_gvec_vgfm64, },
};

if (es > ES_64) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}
gen_gvec_3(get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3), &g[es]);
return DISAS_NEXT;
}

static DisasJumpType op_vgfma(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m5);
static const GVecGen4 g[4] = {
{ .fno = gen_helper_gvec_vgfma8, },
{ .fno = gen_helper_gvec_vgfma16, },
{ .fno = gen_helper_gvec_vgfma32, },
{ .fno = gen_helper_gvec_vgfma64, },
};

if (es > ES_64) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}
gen_gvec_4(get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3), get_field(s->fields, v4), &g[es]);
return DISAS_NEXT;
}

static DisasJumpType op_vlc(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m3);

if (es > ES_64) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}

gen_gvec_fn_2(neg, es, get_field(s->fields, v1), get_field(s->fields, v2));
return DISAS_NEXT;
}

static DisasJumpType op_vlp(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m3);

if (es > ES_64) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}

gen_gvec_fn_2(abs, es, get_field(s->fields, v1), get_field(s->fields, v2));
return DISAS_NEXT;
}

static DisasJumpType op_vmx(DisasContext *s, DisasOps *o)
{
const uint8_t v1 = get_field(s->fields, v1);
const uint8_t v2 = get_field(s->fields, v2);
const uint8_t v3 = get_field(s->fields, v3);
const uint8_t es = get_field(s->fields, m4);

if (es > ES_64) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}

switch (s->fields->op2) {
case 0xff:
gen_gvec_fn_3(smax, es, v1, v2, v3);
break;
case 0xfd:
gen_gvec_fn_3(umax, es, v1, v2, v3);
break;
case 0xfe:
gen_gvec_fn_3(smin, es, v1, v2, v3);
break;
case 0xfc:
gen_gvec_fn_3(umin, es, v1, v2, v3);
break;
default:
g_assert_not_reached();
}
return DISAS_NEXT;
}

static void gen_mal_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, TCGv_i32 c)
{
TCGv_i32 t0 = tcg_temp_new_i32();

tcg_gen_mul_i32(t0, a, b);
tcg_gen_add_i32(d, t0, c);

tcg_temp_free_i32(t0);
}

static void gen_mah_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, TCGv_i32 c)
{
TCGv_i64 t0 = tcg_temp_new_i64();
TCGv_i64 t1 = tcg_temp_new_i64();
TCGv_i64 t2 = tcg_temp_new_i64();

tcg_gen_ext_i32_i64(t0, a);
tcg_gen_ext_i32_i64(t1, b);
tcg_gen_ext_i32_i64(t2, c);
tcg_gen_mul_i64(t0, t0, t1);
tcg_gen_add_i64(t0, t0, t2);
tcg_gen_extrh_i64_i32(d, t0);

tcg_temp_free(t0);
tcg_temp_free(t1);
tcg_temp_free(t2);
}

static void gen_malh_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, TCGv_i32 c)
{
TCGv_i64 t0 = tcg_temp_new_i64();
TCGv_i64 t1 = tcg_temp_new_i64();
TCGv_i64 t2 = tcg_temp_new_i64();

tcg_gen_extu_i32_i64(t0, a);
tcg_gen_extu_i32_i64(t1, b);
tcg_gen_extu_i32_i64(t2, c);
tcg_gen_mul_i64(t0, t0, t1);
tcg_gen_add_i64(t0, t0, t2);
tcg_gen_extrh_i64_i32(d, t0);

tcg_temp_free(t0);
tcg_temp_free(t1);
tcg_temp_free(t2);
}

static DisasJumpType op_vma(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m5);
static const GVecGen4 g_vmal[3] = {
{ .fno = gen_helper_gvec_vmal8, },
{ .fno = gen_helper_gvec_vmal16, },
{ .fni4 = gen_mal_i32, },
};
static const GVecGen4 g_vmah[3] = {
{ .fno = gen_helper_gvec_vmah8, },
{ .fno = gen_helper_gvec_vmah16, },
{ .fni4 = gen_mah_i32, },
};
static const GVecGen4 g_vmalh[3] = {
{ .fno = gen_helper_gvec_vmalh8, },
{ .fno = gen_helper_gvec_vmalh16, },
{ .fni4 = gen_malh_i32, },
};
static const GVecGen4 g_vmae[3] = {
{ .fno = gen_helper_gvec_vmae8, },
{ .fno = gen_helper_gvec_vmae16, },
{ .fno = gen_helper_gvec_vmae32, },
};
static const GVecGen4 g_vmale[3] = {
{ .fno = gen_helper_gvec_vmale8, },
{ .fno = gen_helper_gvec_vmale16, },
{ .fno = gen_helper_gvec_vmale32, },
};
static const GVecGen4 g_vmao[3] = {
{ .fno = gen_helper_gvec_vmao8, },
{ .fno = gen_helper_gvec_vmao16, },
{ .fno = gen_helper_gvec_vmao32, },
};
static const GVecGen4 g_vmalo[3] = {
{ .fno = gen_helper_gvec_vmalo8, },
{ .fno = gen_helper_gvec_vmalo16, },
{ .fno = gen_helper_gvec_vmalo32, },
};
const GVecGen4 *fn;

if (es > ES_32) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}

switch (s->fields->op2) {
case 0xaa:
fn = &g_vmal[es];
break;
case 0xab:
fn = &g_vmah[es];
break;
case 0xa9:
fn = &g_vmalh[es];
break;
case 0xae:
fn = &g_vmae[es];
break;
case 0xac:
fn = &g_vmale[es];
break;
case 0xaf:
fn = &g_vmao[es];
break;
case 0xad:
fn = &g_vmalo[es];
break;
default:
g_assert_not_reached();
}

gen_gvec_4(get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3), get_field(s->fields, v4), fn);
return DISAS_NEXT;
}

static void gen_mh_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
{
TCGv_i32 t = tcg_temp_new_i32();

tcg_gen_muls2_i32(t, d, a, b);
tcg_temp_free_i32(t);
}

static void gen_mlh_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
{
TCGv_i32 t = tcg_temp_new_i32();

tcg_gen_mulu2_i32(t, d, a, b);
tcg_temp_free_i32(t);
}

static DisasJumpType op_vm(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m4);
static const GVecGen3 g_vmh[3] = {
{ .fno = gen_helper_gvec_vmh8, },
{ .fno = gen_helper_gvec_vmh16, },
{ .fni4 = gen_mh_i32, },
};
static const GVecGen3 g_vmlh[3] = {
{ .fno = gen_helper_gvec_vmlh8, },
{ .fno = gen_helper_gvec_vmlh16, },
{ .fni4 = gen_mlh_i32, },
};
static const GVecGen3 g_vme[3] = {
{ .fno = gen_helper_gvec_vme8, },
{ .fno = gen_helper_gvec_vme16, },
{ .fno = gen_helper_gvec_vme32, },
};
static const GVecGen3 g_vmle[3] = {
{ .fno = gen_helper_gvec_vmle8, },
{ .fno = gen_helper_gvec_vmle16, },
{ .fno = gen_helper_gvec_vmle32, },
};
static const GVecGen3 g_vmo[3] = {
{ .fno = gen_helper_gvec_vmo8, },
{ .fno = gen_helper_gvec_vmo16, },
{ .fno = gen_helper_gvec_vmo32, },
};
static const GVecGen3 g_vmlo[3] = {
{ .fno = gen_helper_gvec_vmlo8, },
{ .fno = gen_helper_gvec_vmlo16, },
{ .fno = gen_helper_gvec_vmlo32, },
};
const GVecGen3 *fn;

if (es > ES_32) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}

switch (s->fields->op2) {
case 0xa2:
gen_gvec_fn_3(mul, es, get_field(s->fields, v1),
get_field(s->fields, v2), get_field(s->fields, v3));
return DISAS_NEXT;
case 0xa3:
fn = &g_vmh[es];
break;
case 0xa1:
fn = &g_vmlh[es];
break;
case 0xa6:
fn = &g_vme[es];
break;
case 0xa4:
fn = &g_vmle[es];
break;
case 0xa7:
fn = &g_vmo[es];
break;
case 0xa5:
fn = &g_vmlo[es];
break;
default:
g_assert_not_reached();
}

gen_gvec_3(get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3), fn);
return DISAS_NEXT;
}

static DisasJumpType op_vnn(DisasContext *s, DisasOps *o)
{
gen_gvec_fn_3(nand, ES_8, get_field(s->fields, v1),
get_field(s->fields, v2), get_field(s->fields, v3));
return DISAS_NEXT;
}

static DisasJumpType op_vno(DisasContext *s, DisasOps *o)
{
gen_gvec_fn_3(nor, ES_8, get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3));
return DISAS_NEXT;
}

static DisasJumpType op_vnx(DisasContext *s, DisasOps *o)
{
gen_gvec_fn_3(eqv, ES_8, get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3));
return DISAS_NEXT;
}

static DisasJumpType op_vo(DisasContext *s, DisasOps *o)
{
gen_gvec_fn_3(or, ES_8, get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3));
return DISAS_NEXT;
}

static DisasJumpType op_voc(DisasContext *s, DisasOps *o)
{
gen_gvec_fn_3(orc, ES_8, get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3));
return DISAS_NEXT;
}

static DisasJumpType op_vpopct(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m3);
static const GVecGen2 g[4] = {
{ .fno = gen_helper_gvec_vpopct8, },
{ .fno = gen_helper_gvec_vpopct16, },
{ .fni4 = tcg_gen_ctpop_i32, },
{ .fni8 = tcg_gen_ctpop_i64, },
};

if (es > ES_64 || (es != ES_8 && !s390_has_feat(S390_FEAT_VECTOR_ENH))) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}

gen_gvec_2(get_field(s->fields, v1), get_field(s->fields, v2), &g[es]);
return DISAS_NEXT;
}

static void gen_rll_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
{
TCGv_i32 t0 = tcg_temp_new_i32();

tcg_gen_andi_i32(t0, b, 31);
tcg_gen_rotl_i32(d, a, t0);
tcg_temp_free_i32(t0);
}

static void gen_rll_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
{
TCGv_i64 t0 = tcg_temp_new_i64();

tcg_gen_andi_i64(t0, b, 63);
tcg_gen_rotl_i64(d, a, t0);
tcg_temp_free_i64(t0);
}

static DisasJumpType op_verllv(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m4);
static const GVecGen3 g[4] = {
{ .fno = gen_helper_gvec_verllv8, },
{ .fno = gen_helper_gvec_verllv16, },
{ .fni4 = gen_rll_i32, },
{ .fni8 = gen_rll_i64, },
};

if (es > ES_64) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}

gen_gvec_3(get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3), &g[es]);
return DISAS_NEXT;
}

static DisasJumpType op_verll(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m4);
static const GVecGen2s g[4] = {
{ .fno = gen_helper_gvec_verll8, },
{ .fno = gen_helper_gvec_verll16, },
{ .fni4 = gen_rll_i32, },
{ .fni8 = gen_rll_i64, },
};

if (es > ES_64) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}
gen_gvec_2s(get_field(s->fields, v1), get_field(s->fields, v3), o->addr1,
&g[es]);
return DISAS_NEXT;
}

static void gen_rim_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, int32_t c)
{
TCGv_i32 t = tcg_temp_new_i32();

tcg_gen_rotli_i32(t, a, c & 31);
tcg_gen_and_i32(t, t, b);
tcg_gen_andc_i32(d, d, b);
tcg_gen_or_i32(d, d, t);

tcg_temp_free_i32(t);
}

static void gen_rim_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, int64_t c)
{
TCGv_i64 t = tcg_temp_new_i64();

tcg_gen_rotli_i64(t, a, c & 63);
tcg_gen_and_i64(t, t, b);
tcg_gen_andc_i64(d, d, b);
tcg_gen_or_i64(d, d, t);

tcg_temp_free_i64(t);
}

static DisasJumpType op_verim(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m5);
const uint8_t i4 = get_field(s->fields, i4) &
(NUM_VEC_ELEMENT_BITS(es) - 1);
static const GVecGen3i g[4] = {
{ .fno = gen_helper_gvec_verim8, },
{ .fno = gen_helper_gvec_verim16, },
{ .fni4 = gen_rim_i32,
.load_dest = true, },
{ .fni8 = gen_rim_i64,
.load_dest = true, },
};

if (es > ES_64) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}

gen_gvec_3i(get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3), i4, &g[es]);
return DISAS_NEXT;
}

static DisasJumpType op_vesv(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m4);
const uint8_t v1 = get_field(s->fields, v1);
const uint8_t v2 = get_field(s->fields, v2);
const uint8_t v3 = get_field(s->fields, v3);

if (es > ES_64) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}

switch (s->fields->op2) {
case 0x70:
gen_gvec_fn_3(shlv, es, v1, v2, v3);
break;
case 0x7a:
gen_gvec_fn_3(sarv, es, v1, v2, v3);
break;
case 0x78:
gen_gvec_fn_3(shrv, es, v1, v2, v3);
break;
default:
g_assert_not_reached();
}
return DISAS_NEXT;
}

static DisasJumpType op_ves(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m4);
const uint8_t d2 = get_field(s->fields, d2) &
(NUM_VEC_ELEMENT_BITS(es) - 1);
const uint8_t v1 = get_field(s->fields, v1);
const uint8_t v3 = get_field(s->fields, v3);
TCGv_i32 shift;

if (es > ES_64) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}

if (likely(!get_field(s->fields, b2))) {
switch (s->fields->op2) {
case 0x30:
gen_gvec_fn_2i(shli, es, v1, v3, d2);
break;
case 0x3a:
gen_gvec_fn_2i(sari, es, v1, v3, d2);
break;
case 0x38:
gen_gvec_fn_2i(shri, es, v1, v3, d2);
break;
default:
g_assert_not_reached();
}
} else {
shift = tcg_temp_new_i32();
tcg_gen_extrl_i64_i32(shift, o->addr1);
tcg_gen_andi_i32(shift, shift, NUM_VEC_ELEMENT_BITS(es) - 1);
switch (s->fields->op2) {
case 0x30:
gen_gvec_fn_2s(shls, es, v1, v3, shift);
break;
case 0x3a:
gen_gvec_fn_2s(sars, es, v1, v3, shift);
break;
case 0x38:
gen_gvec_fn_2s(shrs, es, v1, v3, shift);
break;
default:
g_assert_not_reached();
}
tcg_temp_free_i32(shift);
}
return DISAS_NEXT;
}

static DisasJumpType op_vsl(DisasContext *s, DisasOps *o)
{
TCGv_i64 shift = tcg_temp_new_i64();

read_vec_element_i64(shift, get_field(s->fields, v3), 7, ES_8);
if (s->fields->op2 == 0x74) {
tcg_gen_andi_i64(shift, shift, 0x7);
} else {
tcg_gen_andi_i64(shift, shift, 0x78);
}

gen_gvec_2i_ool(get_field(s->fields, v1), get_field(s->fields, v2),
shift, 0, gen_helper_gvec_vsl);
tcg_temp_free_i64(shift);
return DISAS_NEXT;
}

static DisasJumpType op_vsldb(DisasContext *s, DisasOps *o)
{
const uint8_t i4 = get_field(s->fields, i4) & 0xf;
const int left_shift = (i4 & 7) * 8;
const int right_shift = 64 - left_shift;
TCGv_i64 t0 = tcg_temp_new_i64();
TCGv_i64 t1 = tcg_temp_new_i64();
TCGv_i64 t2 = tcg_temp_new_i64();

if ((i4 & 8) == 0) {
read_vec_element_i64(t0, get_field(s->fields, v2), 0, ES_64);
read_vec_element_i64(t1, get_field(s->fields, v2), 1, ES_64);
read_vec_element_i64(t2, get_field(s->fields, v3), 0, ES_64);
} else {
read_vec_element_i64(t0, get_field(s->fields, v2), 1, ES_64);
read_vec_element_i64(t1, get_field(s->fields, v3), 0, ES_64);
read_vec_element_i64(t2, get_field(s->fields, v3), 1, ES_64);
}
tcg_gen_extract2_i64(t0, t1, t0, right_shift);
tcg_gen_extract2_i64(t1, t2, t1, right_shift);
write_vec_element_i64(t0, get_field(s->fields, v1), 0, ES_64);
write_vec_element_i64(t1, get_field(s->fields, v1), 1, ES_64);

tcg_temp_free(t0);
tcg_temp_free(t1);
tcg_temp_free(t2);
return DISAS_NEXT;
}

static DisasJumpType op_vsra(DisasContext *s, DisasOps *o)
{
TCGv_i64 shift = tcg_temp_new_i64();

read_vec_element_i64(shift, get_field(s->fields, v3), 7, ES_8);
if (s->fields->op2 == 0x7e) {
tcg_gen_andi_i64(shift, shift, 0x7);
} else {
tcg_gen_andi_i64(shift, shift, 0x78);
}

gen_gvec_2i_ool(get_field(s->fields, v1), get_field(s->fields, v2),
shift, 0, gen_helper_gvec_vsra);
tcg_temp_free_i64(shift);
return DISAS_NEXT;
}

static DisasJumpType op_vsrl(DisasContext *s, DisasOps *o)
{
TCGv_i64 shift = tcg_temp_new_i64();

read_vec_element_i64(shift, get_field(s->fields, v3), 7, ES_8);
if (s->fields->op2 == 0x7c) {
tcg_gen_andi_i64(shift, shift, 0x7);
} else {
tcg_gen_andi_i64(shift, shift, 0x78);
}

gen_gvec_2i_ool(get_field(s->fields, v1), get_field(s->fields, v2),
shift, 0, gen_helper_gvec_vsrl);
tcg_temp_free_i64(shift);
return DISAS_NEXT;
}

static DisasJumpType op_vs(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m4);

if (es > ES_128) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
} else if (es == ES_128) {
gen_gvec128_3_i64(tcg_gen_sub2_i64, get_field(s->fields, v1),
get_field(s->fields, v2), get_field(s->fields, v3));
return DISAS_NEXT;
}
gen_gvec_fn_3(sub, es, get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3));
return DISAS_NEXT;
}

static void gen_scbi_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
{
tcg_gen_setcond_i32(TCG_COND_LTU, d, a, b);
}

static void gen_scbi_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
{
tcg_gen_setcond_i64(TCG_COND_LTU, d, a, b);
}

static void gen_scbi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
{
TCGv_i64 th = tcg_temp_new_i64();
TCGv_i64 tl = tcg_temp_new_i64();
TCGv_i64 zero = tcg_const_i64(0);

tcg_gen_sub2_i64(tl, th, al, zero, bl, zero);
tcg_gen_andi_i64(th, th, 1);
tcg_gen_sub2_i64(tl, th, ah, zero, th, zero);
tcg_gen_sub2_i64(tl, th, tl, th, bh, zero);
tcg_gen_andi_i64(dl, th, 1);
tcg_gen_mov_i64(dh, zero);

tcg_temp_free_i64(th);
tcg_temp_free_i64(tl);
tcg_temp_free_i64(zero);
}

static DisasJumpType op_vscbi(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m4);
static const GVecGen3 g[4] = {
{ .fno = gen_helper_gvec_vscbi8, },
{ .fno = gen_helper_gvec_vscbi16, },
{ .fni4 = gen_scbi_i32, },
{ .fni8 = gen_scbi_i64, },
};

if (es > ES_128) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
} else if (es == ES_128) {
gen_gvec128_3_i64(gen_scbi2_i64, get_field(s->fields, v1),
get_field(s->fields, v2), get_field(s->fields, v3));
return DISAS_NEXT;
}
gen_gvec_3(get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3), &g[es]);
return DISAS_NEXT;
}

static void gen_sbi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch)
{
TCGv_i64 tl = tcg_temp_new_i64();
TCGv_i64 zero = tcg_const_i64(0);

tcg_gen_andi_i64(tl, cl, 1);
tcg_gen_sub2_i64(dl, dh, al, ah, bl, bh);
tcg_gen_sub2_i64(dl, dh, dl, dh, tl, zero);
tcg_temp_free_i64(tl);
tcg_temp_free_i64(zero);
}

static DisasJumpType op_vsbi(DisasContext *s, DisasOps *o)
{
if (get_field(s->fields, m5) != ES_128) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}

gen_gvec128_4_i64(gen_sbi2_i64, get_field(s->fields, v1),
get_field(s->fields, v2), get_field(s->fields, v3),
get_field(s->fields, v4));
return DISAS_NEXT;
}

static void gen_sbcbi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch)
{
TCGv_i64 th = tcg_temp_new_i64();
TCGv_i64 tl = tcg_temp_new_i64();
TCGv_i64 zero = tcg_const_i64(0);

tcg_gen_andi_i64(tl, cl, 1);
tcg_gen_sub2_i64(tl, th, al, zero, tl, zero);
tcg_gen_sub2_i64(tl, th, tl, th, bl, zero);
tcg_gen_andi_i64(th, th, 1);
tcg_gen_sub2_i64(tl, th, ah, zero, th, zero);
tcg_gen_sub2_i64(tl, th, tl, th, bh, zero);
tcg_gen_andi_i64(dl, th, 1);
tcg_gen_mov_i64(dh, zero);

tcg_temp_free_i64(tl);
tcg_temp_free_i64(th);
tcg_temp_free_i64(zero);
}

static DisasJumpType op_vsbcbi(DisasContext *s, DisasOps *o)
{
if (get_field(s->fields, m5) != ES_128) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}

gen_gvec128_4_i64(gen_sbcbi2_i64, get_field(s->fields, v1),
get_field(s->fields, v2), get_field(s->fields, v3),
get_field(s->fields, v4));
return DISAS_NEXT;
}

static DisasJumpType op_vsumg(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m4);
TCGv_i64 sum, tmp;
uint8_t dst_idx;

if (es == ES_8 || es > ES_32) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}

sum = tcg_temp_new_i64();
tmp = tcg_temp_new_i64();
for (dst_idx = 0; dst_idx < 2; dst_idx++) {
uint8_t idx = dst_idx * NUM_VEC_ELEMENTS(es) / 2;
const uint8_t max_idx = idx + NUM_VEC_ELEMENTS(es) / 2 - 1;

read_vec_element_i64(sum, get_field(s->fields, v3), max_idx, es);
for (; idx <= max_idx; idx++) {
read_vec_element_i64(tmp, get_field(s->fields, v2), idx, es);
tcg_gen_add_i64(sum, sum, tmp);
}
write_vec_element_i64(sum, get_field(s->fields, v1), dst_idx, ES_64);
}
tcg_temp_free_i64(sum);
tcg_temp_free_i64(tmp);
return DISAS_NEXT;
}

static DisasJumpType op_vsumq(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m4);
const uint8_t max_idx = NUM_VEC_ELEMENTS(es) - 1;
TCGv_i64 sumh, suml, zero, tmpl;
uint8_t idx;

if (es < ES_32 || es > ES_64) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}

sumh = tcg_const_i64(0);
suml = tcg_temp_new_i64();
zero = tcg_const_i64(0);
tmpl = tcg_temp_new_i64();

read_vec_element_i64(suml, get_field(s->fields, v3), max_idx, es);
for (idx = 0; idx <= max_idx; idx++) {
read_vec_element_i64(tmpl, get_field(s->fields, v2), idx, es);
tcg_gen_add2_i64(suml, sumh, suml, sumh, tmpl, zero);
}
write_vec_element_i64(sumh, get_field(s->fields, v1), 0, ES_64);
write_vec_element_i64(suml, get_field(s->fields, v1), 1, ES_64);

tcg_temp_free_i64(sumh);
tcg_temp_free_i64(suml);
tcg_temp_free_i64(zero);
tcg_temp_free_i64(tmpl);
return DISAS_NEXT;
}

static DisasJumpType op_vsum(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m4);
TCGv_i32 sum, tmp;
uint8_t dst_idx;

if (es > ES_16) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}

sum = tcg_temp_new_i32();
tmp = tcg_temp_new_i32();
for (dst_idx = 0; dst_idx < 4; dst_idx++) {
uint8_t idx = dst_idx * NUM_VEC_ELEMENTS(es) / 4;
const uint8_t max_idx = idx + NUM_VEC_ELEMENTS(es) / 4 - 1;

read_vec_element_i32(sum, get_field(s->fields, v3), max_idx, es);
for (; idx <= max_idx; idx++) {
read_vec_element_i32(tmp, get_field(s->fields, v2), idx, es);
tcg_gen_add_i32(sum, sum, tmp);
}
write_vec_element_i32(sum, get_field(s->fields, v1), dst_idx, ES_32);
}
tcg_temp_free_i32(sum);
tcg_temp_free_i32(tmp);
return DISAS_NEXT;
}

static DisasJumpType op_vtm(DisasContext *s, DisasOps *o)
{
gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
cpu_env, 0, gen_helper_gvec_vtm);
set_cc_static(s);
return DISAS_NEXT;
}

+ 616
- 0
target/s390x/vec_int_helper.c View File

@@ -0,0 +1,616 @@
/*
* QEMU TCG support -- s390x vector integer instruction support
*
* Copyright (C) 2019 Red Hat Inc
*
* Authors:
* David Hildenbrand <david@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "cpu.h"
#include "vec.h"
#include "exec/helper-proto.h"
#include "tcg/tcg-gvec-desc.h"

static bool s390_vec_is_zero(const S390Vector *v)
{
return !v->doubleword[0] && !v->doubleword[1];
}

static void s390_vec_xor(S390Vector *res, const S390Vector *a,
const S390Vector *b)
{
res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0];
res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1];
}

static void s390_vec_and(S390Vector *res, const S390Vector *a,
const S390Vector *b)
{
res->doubleword[0] = a->doubleword[0] & b->doubleword[0];
res->doubleword[1] = a->doubleword[1] & b->doubleword[1];
}

static bool s390_vec_equal(const S390Vector *a, const S390Vector *b)
{
return a->doubleword[0] == b->doubleword[0] &&
a->doubleword[1] == b->doubleword[1];
}

static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count)
{
uint64_t tmp;

g_assert(count < 128);
if (count == 0) {
d->doubleword[0] = a->doubleword[0];
d->doubleword[1] = a->doubleword[1];
} else if (count == 64) {
d->doubleword[0] = a->doubleword[1];
d->doubleword[1] = 0;
} else if (count < 64) {
tmp = extract64(a->doubleword[1], 64 - count, count);
d->doubleword[1] = a->doubleword[1] << count;
d->doubleword[0] = (a->doubleword[0] << count) | tmp;
} else {
d->doubleword[0] = a->doubleword[1] << (count - 64);
d->doubleword[1] = 0;
}
}

static void s390_vec_sar(S390Vector *d, const S390Vector *a, uint64_t count)
{
uint64_t tmp;

if (count == 0) {
d->doubleword[0] = a->doubleword[0];
d->doubleword[1] = a->doubleword[1];
} else if (count == 64) {
d->doubleword[1] = a->doubleword[0];
d->doubleword[0] = 0;
} else if (count < 64) {
tmp = a->doubleword[1] >> count;
d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
d->doubleword[0] = (int64_t)a->doubleword[0] >> count;
} else {
d->doubleword[1] = (int64_t)a->doubleword[0] >> (count - 64);
d->doubleword[0] = 0;
}
}

static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count)
{
uint64_t tmp;

g_assert(count < 128);
if (count == 0) {
d->doubleword[0] = a->doubleword[0];
d->doubleword[1] = a->doubleword[1];
} else if (count == 64) {
d->doubleword[1] = a->doubleword[0];
d->doubleword[0] = 0;
} else if (count < 64) {
tmp = a->doubleword[1] >> count;
d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
d->doubleword[0] = a->doubleword[0] >> count;
} else {
d->doubleword[1] = a->doubleword[0] >> (count - 64);
d->doubleword[0] = 0;
}
}
#define DEF_VAVG(BITS) \
void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3, \
uint32_t desc) \
{ \
int i; \
\
for (i = 0; i < (128 / BITS); i++) { \
const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \
const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \
\
s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \
} \
}
DEF_VAVG(8)
DEF_VAVG(16)

#define DEF_VAVGL(BITS) \
void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3, \
uint32_t desc) \
{ \
int i; \
\
for (i = 0; i < (128 / BITS); i++) { \
const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
\
s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \
} \
}
DEF_VAVGL(8)
DEF_VAVGL(16)

#define DEF_VCLZ(BITS) \
void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc) \
{ \
int i; \
\
for (i = 0; i < (128 / BITS); i++) { \
const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
\
s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS); \
} \
}
DEF_VCLZ(8)
DEF_VCLZ(16)

#define DEF_VCTZ(BITS) \
void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc) \
{ \
int i; \
\
for (i = 0; i < (128 / BITS); i++) { \
const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
\
s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS); \
} \
}
DEF_VCTZ(8)
DEF_VCTZ(16)

/* like binary multiplication, but XOR instead of addition */
#define DEF_GALOIS_MULTIPLY(BITS, TBITS) \
static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a, \
uint##TBITS##_t b) \
{ \
uint##TBITS##_t res = 0; \
\
while (b) { \
if (b & 0x1) { \
res = res ^ a; \
} \
a = a << 1; \
b = b >> 1; \
} \
return res; \
}
DEF_GALOIS_MULTIPLY(8, 16)
DEF_GALOIS_MULTIPLY(16, 32)
DEF_GALOIS_MULTIPLY(32, 64)

static S390Vector galois_multiply64(uint64_t a, uint64_t b)
{
S390Vector res = {};
S390Vector va = {
.doubleword[1] = a,
};
S390Vector vb = {
.doubleword[1] = b,
};

while (!s390_vec_is_zero(&vb)) {
if (vb.doubleword[1] & 0x1) {
s390_vec_xor(&res, &res, &va);
}
s390_vec_shl(&va, &va, 1);
s390_vec_shr(&vb, &vb, 1);
}
return res;
}

#define DEF_VGFM(BITS, TBITS) \
void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3, \
uint32_t desc) \
{ \
int i; \
\
for (i = 0; i < (128 / TBITS); i++) { \
uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \
uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \
uint##TBITS##_t d = galois_multiply##BITS(a, b); \
\
a = s390_vec_read_element##BITS(v2, i * 2 + 1); \
b = s390_vec_read_element##BITS(v3, i * 2 + 1); \
d = d ^ galois_multiply32(a, b); \
s390_vec_write_element##TBITS(v1, i, d); \
} \
}
DEF_VGFM(8, 16)
DEF_VGFM(16, 32)
DEF_VGFM(32, 64)

void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3,
uint32_t desc)
{
S390Vector tmp1, tmp2;
uint64_t a, b;

a = s390_vec_read_element64(v2, 0);
b = s390_vec_read_element64(v3, 0);
tmp1 = galois_multiply64(a, b);
a = s390_vec_read_element64(v2, 1);
b = s390_vec_read_element64(v3, 1);
tmp2 = galois_multiply64(a, b);
s390_vec_xor(v1, &tmp1, &tmp2);
}

#define DEF_VGFMA(BITS, TBITS) \
void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3, \
const void *v4, uint32_t desc) \
{ \
int i; \
\
for (i = 0; i < (128 / TBITS); i++) { \
uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \
uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \
uint##TBITS##_t d = galois_multiply##BITS(a, b); \
\
a = s390_vec_read_element##BITS(v2, i * 2 + 1); \
b = s390_vec_read_element##BITS(v3, i * 2 + 1); \
d = d ^ galois_multiply32(a, b); \
d = d ^ s390_vec_read_element##TBITS(v4, i); \
s390_vec_write_element##TBITS(v1, i, d); \
} \
}
DEF_VGFMA(8, 16)
DEF_VGFMA(16, 32)
DEF_VGFMA(32, 64)

void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3,
const void *v4, uint32_t desc)
{
S390Vector tmp1, tmp2;
uint64_t a, b;

a = s390_vec_read_element64(v2, 0);
b = s390_vec_read_element64(v3, 0);
tmp1 = galois_multiply64(a, b);
a = s390_vec_read_element64(v2, 1);
b = s390_vec_read_element64(v3, 1);
tmp2 = galois_multiply64(a, b);
s390_vec_xor(&tmp1, &tmp1, &tmp2);
s390_vec_xor(v1, &tmp1, v4);
}

#define DEF_VMAL(BITS) \
void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3, \
const void *v4, uint32_t desc) \
{ \
int i; \
\
for (i = 0; i < (128 / BITS); i++) { \
const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \
\
s390_vec_write_element##BITS(v1, i, a * b + c); \
} \
}
DEF_VMAL(8)
DEF_VMAL(16)

#define DEF_VMAH(BITS) \
void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3, \
const void *v4, uint32_t desc) \
{ \
int i; \
\
for (i = 0; i < (128 / BITS); i++) { \
const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \
const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \
const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i); \
\
s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \
} \
}
DEF_VMAH(8)
DEF_VMAH(16)

#define DEF_VMALH(BITS) \
void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3, \
const void *v4, uint32_t desc) \
{ \
int i; \
\
for (i = 0; i < (128 / BITS); i++) { \
const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \
\
s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \
} \
}
DEF_VMALH(8)
DEF_VMALH(16)

#define DEF_VMAE(BITS, TBITS) \
void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3, \
const void *v4, uint32_t desc) \
{ \
int i, j; \
\
for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \
int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \
int##TBITS##_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, j); \
\
s390_vec_write_element##TBITS(v1, i, a * b + c); \
} \
}
DEF_VMAE(8, 16)
DEF_VMAE(16, 32)
DEF_VMAE(32, 64)

#define DEF_VMALE(BITS, TBITS) \
void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3, \
const void *v4, uint32_t desc) \
{ \
int i, j; \
\
for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \
uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \
uint##TBITS##_t c = s390_vec_read_element##BITS(v4, j); \
\
s390_vec_write_element##TBITS(v1, i, a * b + c); \
} \
}
DEF_VMALE(8, 16)
DEF_VMALE(16, 32)
DEF_VMALE(32, 64)

#define DEF_VMAO(BITS, TBITS) \
void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3, \
const void *v4, uint32_t desc) \
{ \
int i, j; \
\
for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \
int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \
int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \
int##TBITS##_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, j); \
\
s390_vec_write_element##TBITS(v1, i, a * b + c); \
} \
}
DEF_VMAO(8, 16)
DEF_VMAO(16, 32)
DEF_VMAO(32, 64)

#define DEF_VMALO(BITS, TBITS) \
void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3, \
const void *v4, uint32_t desc) \
{ \