Browse Source

Merge tag 's390x-tcg-2019-05-17-2' into s390-next-staging

Implement all Vector Integer Instructions introduced with the
"Vector Facility" for s390x TCG.

# gpg: Signature made Fri 17 May 2019 01:37:40 PM CEST
# gpg:                using RSA key 4DDE10F700FF835A
# gpg: Good signature from "David Hildenbrand <david@redhat.com>" [full]
# gpg:                 aka "David Hildenbrand <davidhildenbrand@gmail.com>" [full]

* tag 's390x-tcg-2019-05-17-2': (40 commits)
  s390x/tcg: Implement VECTOR TEST UNDER MASK
  s390x/tcg: Implement VECTOR SUM ACROSS WORD
  s390x/tcg: Implement VECTOR SUM ACROSS QUADWORD
  s390x/tcg: Implement VECTOR SUM ACROSS DOUBLEWORD
  s390x/tcg: Implement VECTOR SUBTRACT WITH BORROW COMPUTE BORROW INDICATION
  s390x/tcg: Implement VECTOR SUBTRACT WITH BORROW INDICATION
  s390x/tcg: Implement VECTOR SUBTRACT COMPUTE BORROW INDICATION
  s390x/tcg: Implement VECTOR SUBTRACT
  s390x/tcg: Implement VECTOR SHIFT RIGHT LOGICAL *
  s390x/tcg: Implement VECTOR SHIFT RIGHT ARITHMETIC
  s390x/tcg: Implement VECTOR SHIFT LEFT DOUBLE BY BYTE
  s390x/tcg: Implement VECTOR SHIFT LEFT (BY BYTE)
  s390x/tcg: Implement VECTOR ELEMENT SHIFT
  s390x/tcg: Implement VECTOR ELEMENT ROTATE AND INSERT UNDER MASK
  s390x/tcg: Implement VECTOR ELEMENT ROTATE LEFT LOGICAL
  s390x/tcg: Implement VECTOR POPULATION COUNT
  s390x/tcg: Implement VECTOR OR WITH COMPLEMENT
  s390x/tcg: Implement VECTOR OR
  s390x/tcg: Implement VECTOR NOT EXCLUSIVE OR
  s390x/tcg: Implement VECTOR NOR
  ...

Signed-off-by: Cornelia Huck <cohuck@redhat.com>
tags/v4.1.0-rc0
Cornelia Huck 2 months ago
parent
commit
e85decf2f1

+ 1
- 1
target/s390x/Makefile.objs View File

@@ -1,7 +1,7 @@
1 1
 obj-y += cpu.o cpu_models.o cpu_features.o gdbstub.o interrupt.o helper.o
2 2
 obj-$(CONFIG_TCG) += translate.o cc_helper.o excp_helper.o fpu_helper.o
3 3
 obj-$(CONFIG_TCG) += int_helper.o mem_helper.o misc_helper.o crypto_helper.o
4
-obj-$(CONFIG_TCG) += vec_helper.o
4
+obj-$(CONFIG_TCG) += vec_helper.o vec_int_helper.o
5 5
 obj-$(CONFIG_SOFTMMU) += machine.o ioinst.o arch_dump.o mmu_helper.o diag.o
6 6
 obj-$(CONFIG_SOFTMMU) += sigp.o
7 7
 obj-$(CONFIG_KVM) += kvm.o

+ 17
- 0
target/s390x/cc_helper.c View File

@@ -402,6 +402,20 @@ static uint32_t cc_calc_lcbb(uint64_t dst)
402 402
     return dst == 16 ? 0 : 3;
403 403
 }
404 404
 
405
+static uint32_t cc_calc_vc(uint64_t low, uint64_t high)
406
+{
407
+    if (high == -1ull && low == -1ull) {
408
+        /* all elements match */
409
+        return 0;
410
+    } else if (high == 0 && low == 0) {
411
+        /* no elements match */
412
+        return 3;
413
+    } else {
414
+        /* some elements but not all match */
415
+        return 1;
416
+    }
417
+}
418
+
405 419
 static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
406 420
                                   uint64_t src, uint64_t dst, uint64_t vr)
407 421
 {
@@ -514,6 +528,9 @@ static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
514 528
     case CC_OP_LCBB:
515 529
         r = cc_calc_lcbb(dst);
516 530
         break;
531
+    case CC_OP_VC:
532
+        r = cc_calc_vc(src, dst);
533
+        break;
517 534
 
518 535
     case CC_OP_NZ_F32:
519 536
         r = set_cc_nz_f32(dst);

+ 1
- 0
target/s390x/helper.c View File

@@ -418,6 +418,7 @@ const char *cc_name(enum cc_op cc_op)
418 418
         [CC_OP_SLA_64]    = "CC_OP_SLA_64",
419 419
         [CC_OP_FLOGR]     = "CC_OP_FLOGR",
420 420
         [CC_OP_LCBB]      = "CC_OP_LCBB",
421
+        [CC_OP_VC]        = "CC_OP_VC",
421 422
     };
422 423
 
423 424
     return cc_names[cc_op];

+ 66
- 0
target/s390x/helper.h View File

@@ -145,6 +145,72 @@ DEF_HELPER_5(gvec_vpkls_cc64, void, ptr, cptr, cptr, env, i32)
145 145
 DEF_HELPER_FLAGS_5(gvec_vperm, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
146 146
 DEF_HELPER_FLAGS_4(vstl, TCG_CALL_NO_WG, void, env, cptr, i64, i64)
147 147
 
148
+/* === Vector Integer Instructions === */
149
+DEF_HELPER_FLAGS_4(gvec_vavg8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
150
+DEF_HELPER_FLAGS_4(gvec_vavg16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
151
+DEF_HELPER_FLAGS_4(gvec_vavgl8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
152
+DEF_HELPER_FLAGS_4(gvec_vavgl16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
153
+DEF_HELPER_FLAGS_3(gvec_vclz8, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
154
+DEF_HELPER_FLAGS_3(gvec_vclz16, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
155
+DEF_HELPER_FLAGS_3(gvec_vctz8, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
156
+DEF_HELPER_FLAGS_3(gvec_vctz16, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
157
+DEF_HELPER_FLAGS_4(gvec_vgfm8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
158
+DEF_HELPER_FLAGS_4(gvec_vgfm16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
159
+DEF_HELPER_FLAGS_4(gvec_vgfm32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
160
+DEF_HELPER_FLAGS_4(gvec_vgfm64, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
161
+DEF_HELPER_FLAGS_5(gvec_vgfma8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
162
+DEF_HELPER_FLAGS_5(gvec_vgfma16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
163
+DEF_HELPER_FLAGS_5(gvec_vgfma32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
164
+DEF_HELPER_FLAGS_5(gvec_vgfma64, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
165
+DEF_HELPER_FLAGS_5(gvec_vmal8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
166
+DEF_HELPER_FLAGS_5(gvec_vmal16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
167
+DEF_HELPER_FLAGS_5(gvec_vmah8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
168
+DEF_HELPER_FLAGS_5(gvec_vmah16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
169
+DEF_HELPER_FLAGS_5(gvec_vmalh8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
170
+DEF_HELPER_FLAGS_5(gvec_vmalh16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
171
+DEF_HELPER_FLAGS_5(gvec_vmae8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
172
+DEF_HELPER_FLAGS_5(gvec_vmae16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
173
+DEF_HELPER_FLAGS_5(gvec_vmae32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
174
+DEF_HELPER_FLAGS_5(gvec_vmale8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
175
+DEF_HELPER_FLAGS_5(gvec_vmale16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
176
+DEF_HELPER_FLAGS_5(gvec_vmale32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
177
+DEF_HELPER_FLAGS_5(gvec_vmao8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
178
+DEF_HELPER_FLAGS_5(gvec_vmao16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
179
+DEF_HELPER_FLAGS_5(gvec_vmao32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
180
+DEF_HELPER_FLAGS_5(gvec_vmalo8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
181
+DEF_HELPER_FLAGS_5(gvec_vmalo16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
182
+DEF_HELPER_FLAGS_5(gvec_vmalo32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
183
+DEF_HELPER_FLAGS_4(gvec_vmh8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
184
+DEF_HELPER_FLAGS_4(gvec_vmh16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
185
+DEF_HELPER_FLAGS_4(gvec_vmlh8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
186
+DEF_HELPER_FLAGS_4(gvec_vmlh16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
187
+DEF_HELPER_FLAGS_4(gvec_vme8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
188
+DEF_HELPER_FLAGS_4(gvec_vme16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
189
+DEF_HELPER_FLAGS_4(gvec_vme32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
190
+DEF_HELPER_FLAGS_4(gvec_vmle8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
191
+DEF_HELPER_FLAGS_4(gvec_vmle16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
192
+DEF_HELPER_FLAGS_4(gvec_vmle32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
193
+DEF_HELPER_FLAGS_4(gvec_vmo8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
194
+DEF_HELPER_FLAGS_4(gvec_vmo16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
195
+DEF_HELPER_FLAGS_4(gvec_vmo32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
196
+DEF_HELPER_FLAGS_4(gvec_vmlo8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
197
+DEF_HELPER_FLAGS_4(gvec_vmlo16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
198
+DEF_HELPER_FLAGS_4(gvec_vmlo32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
199
+DEF_HELPER_FLAGS_3(gvec_vpopct8, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
200
+DEF_HELPER_FLAGS_3(gvec_vpopct16, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
201
+DEF_HELPER_FLAGS_4(gvec_verllv8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
202
+DEF_HELPER_FLAGS_4(gvec_verllv16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
203
+DEF_HELPER_FLAGS_4(gvec_verll8, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
204
+DEF_HELPER_FLAGS_4(gvec_verll16, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
205
+DEF_HELPER_FLAGS_4(gvec_verim8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
206
+DEF_HELPER_FLAGS_4(gvec_verim16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
207
+DEF_HELPER_FLAGS_4(gvec_vsl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
208
+DEF_HELPER_FLAGS_4(gvec_vsra, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
209
+DEF_HELPER_FLAGS_4(gvec_vsrl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
210
+DEF_HELPER_FLAGS_4(gvec_vscbi8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
211
+DEF_HELPER_FLAGS_4(gvec_vscbi16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
212
+DEF_HELPER_4(gvec_vtm, void, ptr, cptr, env, i32)
213
+
148 214
 #ifndef CONFIG_USER_ONLY
149 215
 DEF_HELPER_3(servc, i32, env, i64, i64)
150 216
 DEF_HELPER_4(diag, void, env, i32, i32, i32)

+ 137
- 0
target/s390x/insn-data.def View File

@@ -1054,6 +1054,143 @@
1054 1054
 /* VECTOR UNPACK LOGICAL LOW */
1055 1055
     F(0xe7d4, VUPLL,   VRR_a, V,   0, 0, 0, 0, vup, 0, IF_VEC)
1056 1056
 
1057
+/* === Vector Integer Instructions === */
1058
+
1059
+/* VECTOR ADD */
1060
+    F(0xe7f3, VA,      VRR_c, V,   0, 0, 0, 0, va, 0, IF_VEC)
1061
+/* VECTOR ADD COMPUTE CARRY */
1062
+    F(0xe7f1, VACC,    VRR_c, V,   0, 0, 0, 0, vacc, 0, IF_VEC)
1063
+/* VECTOR ADD WITH CARRY */
1064
+    F(0xe7bb, VAC,     VRR_d, V,   0, 0, 0, 0, vac, 0, IF_VEC)
1065
+/* VECTOR ADD WITH CARRY COMPUTE CARRY */
1066
+    F(0xe7b9, VACCC,   VRR_d, V,   0, 0, 0, 0, vaccc, 0, IF_VEC)
1067
+/* VECTOR AND */
1068
+    F(0xe768, VN,      VRR_c, V,   0, 0, 0, 0, vn, 0, IF_VEC)
1069
+/* VECTOR AND WITH COMPLEMENT */
1070
+    F(0xe769, VNC,     VRR_c, V,   0, 0, 0, 0, vnc, 0, IF_VEC)
1071
+/* VECTOR AVERAGE */
1072
+    F(0xe7f2, VAVG,    VRR_c, V,   0, 0, 0, 0, vavg, 0, IF_VEC)
1073
+/* VECTOR AVERAGE LOGICAL */
1074
+    F(0xe7f0, VAVGL,   VRR_c, V,   0, 0, 0, 0, vavgl, 0, IF_VEC)
1075
+/* VECTOR CHECKSUM */
1076
+    F(0xe766, VCKSM,   VRR_c, V,   0, 0, 0, 0, vcksm, 0, IF_VEC)
1077
+/* VECTOR ELEMENT COMPARE */
1078
+    F(0xe7db, VEC,     VRR_a, V,   0, 0, 0, 0, vec, cmps64, IF_VEC)
1079
+/* VECTOR ELEMENT COMPARE LOGICAL */
1080
+    F(0xe7d9, VECL,    VRR_a, V,   0, 0, 0, 0, vec, cmpu64, IF_VEC)
1081
+/* VECTOR COMPARE EQUAL */
1082
+    E(0xe7f8, VCEQ,    VRR_b, V,   0, 0, 0, 0, vc, 0, TCG_COND_EQ, IF_VEC)
1083
+/* VECTOR COMPARE HIGH */
1084
+    E(0xe7fb, VCH,     VRR_b, V,   0, 0, 0, 0, vc, 0, TCG_COND_GT, IF_VEC)
1085
+/* VECTOR COMPARE HIGH LOGICAL */
1086
+    E(0xe7f9, VCHL,    VRR_b, V,   0, 0, 0, 0, vc, 0, TCG_COND_GTU, IF_VEC)
1087
+/* VECTOR COUNT LEADING ZEROS */
1088
+    F(0xe753, VCLZ,    VRR_a, V,   0, 0, 0, 0, vclz, 0, IF_VEC)
1089
+/* VECTOR COUNT TRAILING ZEROS */
1090
+    F(0xe752, VCTZ,    VRR_a, V,   0, 0, 0, 0, vctz, 0, IF_VEC)
1091
+/* VECTOR EXCLUSIVE OR */
1092
+    F(0xe76d, VX,      VRR_c, V,   0, 0, 0, 0, vx, 0, IF_VEC)
1093
+/* VECTOR GALOIS FIELD MULTIPLY SUM */
1094
+    F(0xe7b4, VGFM,    VRR_c, V,   0, 0, 0, 0, vgfm, 0, IF_VEC)
1095
+/* VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE */
1096
+    F(0xe7bc, VGFMA,   VRR_d, V,   0, 0, 0, 0, vgfma, 0, IF_VEC)
1097
+/* VECTOR LOAD COMPLEMENT */
1098
+    F(0xe7de, VLC,     VRR_a, V,   0, 0, 0, 0, vlc, 0, IF_VEC)
1099
+/* VECTOR LOAD POSITIVE */
1100
+    F(0xe7df, VLP,     VRR_a, V,   0, 0, 0, 0, vlp, 0, IF_VEC)
1101
+/* VECTOR MAXIMUM */
1102
+    F(0xe7ff, VMX,     VRR_c, V,   0, 0, 0, 0, vmx, 0, IF_VEC)
1103
+/* VECTOR MAXIMUM LOGICAL */
1104
+    F(0xe7fd, VMXL,    VRR_c, V,   0, 0, 0, 0, vmx, 0, IF_VEC)
1105
+/* VECTOR MINIMUM */
1106
+    F(0xe7fe, VMN,     VRR_c, V,   0, 0, 0, 0, vmx, 0, IF_VEC)
1107
+/* VECTOR MINIMUM LOGICAL */
1108
+    F(0xe7fc, VMNL,    VRR_c, V,   0, 0, 0, 0, vmx, 0, IF_VEC)
1109
+/* VECTOR MULTIPLY AND ADD LOW */
1110
+    F(0xe7aa, VMAL,    VRR_d, V,   0, 0, 0, 0, vma, 0, IF_VEC)
1111
+/* VECTOR MULTIPLY AND ADD HIGH */
1112
+    F(0xe7ab, VMAH,    VRR_d, V,   0, 0, 0, 0, vma, 0, IF_VEC)
1113
+/* VECTOR MULTIPLY AND ADD LOGICAL HIGH */
1114
+    F(0xe7a9, VMALH,   VRR_d, V,   0, 0, 0, 0, vma, 0, IF_VEC)
1115
+/* VECTOR MULTIPLY AND ADD EVEN */
1116
+    F(0xe7ae, VMAE,    VRR_d, V,   0, 0, 0, 0, vma, 0, IF_VEC)
1117
+/* VECTOR MULTIPLY AND ADD LOGICAL EVEN */
1118
+    F(0xe7ac, VMALE,   VRR_d, V,   0, 0, 0, 0, vma, 0, IF_VEC)
1119
+/* VECTOR MULTIPLY AND ADD ODD */
1120
+    F(0xe7af, VMAO,    VRR_d, V,   0, 0, 0, 0, vma, 0, IF_VEC)
1121
+/* VECTOR MULTIPLY AND ADD LOGICAL ODD */
1122
+    F(0xe7ad, VMALO,   VRR_d, V,   0, 0, 0, 0, vma, 0, IF_VEC)
1123
+/* VECTOR MULTIPLY HIGH */
1124
+    F(0xe7a3, VMH,     VRR_c, V,   0, 0, 0, 0, vm, 0, IF_VEC)
1125
+/* VECTOR MULTIPLY LOGICAL HIGH */
1126
+    F(0xe7a1, VMLH,    VRR_c, V,   0, 0, 0, 0, vm, 0, IF_VEC)
1127
+/* VECTOR MULTIPLY LOW */
1128
+    F(0xe7a2, VML,     VRR_c, V,   0, 0, 0, 0, vm, 0, IF_VEC)
1129
+/* VECTOR MULTIPLY EVEN */
1130
+    F(0xe7a6, VME,     VRR_c, V,   0, 0, 0, 0, vm, 0, IF_VEC)
1131
+/* VECTOR MULTIPLY LOGICAL EVEN */
1132
+    F(0xe7a4, VMLE,    VRR_c, V,   0, 0, 0, 0, vm, 0, IF_VEC)
1133
+/* VECTOR MULTIPLY ODD */
1134
+    F(0xe7a7, VMO,     VRR_c, V,   0, 0, 0, 0, vm, 0, IF_VEC)
1135
+/* VECTOR MULTIPLY LOGICAL ODD */
1136
+    F(0xe7a5, VMLO,    VRR_c, V,   0, 0, 0, 0, vm, 0, IF_VEC)
1137
+/* VECTOR NAND */
1138
+    F(0xe76e, VNN,     VRR_c, VE,  0, 0, 0, 0, vnn, 0, IF_VEC)
1139
+/* VECTOR NOR */
1140
+    F(0xe76b, VNO,     VRR_c, V,   0, 0, 0, 0, vno, 0, IF_VEC)
1141
+/* VECTOR NOT EXCLUSIVE OR */
1142
+    F(0xe76c, VNX,     VRR_c, VE,  0, 0, 0, 0, vnx, 0, IF_VEC)
1143
+/* VECTOR OR */
1144
+    F(0xe76a, VO,      VRR_c, V,   0, 0, 0, 0, vo, 0, IF_VEC)
1145
+/* VECTOR OR WITH COMPLEMENT */
1146
+    F(0xe76f, VOC,     VRR_c, VE,  0, 0, 0, 0, voc, 0, IF_VEC)
1147
+/* VECTOR POPULATION COUNT */
1148
+    F(0xe750, VPOPCT,  VRR_a, V,   0, 0, 0, 0, vpopct, 0, IF_VEC)
1149
+/* VECTOR ELEMENT ROTATE LEFT LOGICAL */
1150
+    F(0xe773, VERLLV,  VRR_c, V,   0, 0, 0, 0, verllv, 0, IF_VEC)
1151
+    F(0xe733, VERLL,   VRS_a, V,   la2, 0, 0, 0, verll, 0, IF_VEC)
1152
+/* VECTOR ELEMENT ROTATE AND INSERT UNDER MASK */
1153
+    F(0xe772, VERIM,   VRI_d, V,   0, 0, 0, 0, verim, 0, IF_VEC)
1154
+/* VECTOR ELEMENT SHIFT LEFT */
1155
+    F(0xe770, VESLV,   VRR_c, V,   0, 0, 0, 0, vesv, 0, IF_VEC)
1156
+    F(0xe730, VESL,    VRS_a, V,   la2, 0, 0, 0, ves, 0, IF_VEC)
1157
+/* VECTOR ELEMENT SHIFT RIGHT ARITHMETIC */
1158
+    F(0xe77a, VESRAV,  VRR_c, V,   0, 0, 0, 0, vesv, 0, IF_VEC)
1159
+    F(0xe73a, VESRA,   VRS_a, V,   la2, 0, 0, 0, ves, 0, IF_VEC)
1160
+/* VECTOR ELEMENT SHIFT RIGHT LOGICAL */
1161
+    F(0xe778, VESRLV,  VRR_c, V,   0, 0, 0, 0, vesv, 0, IF_VEC)
1162
+    F(0xe738, VESRL,   VRS_a, V,   la2, 0, 0, 0, ves, 0, IF_VEC)
1163
+/* VECTOR SHIFT LEFT */
1164
+    F(0xe774, VSL,     VRR_c, V,   0, 0, 0, 0, vsl, 0, IF_VEC)
1165
+/* VECTOR SHIFT LEFT BY BYTE */
1166
+    F(0xe775, VSLB,    VRR_c, V,   0, 0, 0, 0, vsl, 0, IF_VEC)
1167
+/* VECTOR SHIFT LEFT DOUBLE BY BYTE */
1168
+    F(0xe777, VSLDB,   VRI_d, V,   0, 0, 0, 0, vsldb, 0, IF_VEC)
1169
+/* VECTOR SHIFT RIGHT ARITHMETIC */
1170
+    F(0xe77e, VSRA,    VRR_c, V,   0, 0, 0, 0, vsra, 0, IF_VEC)
1171
+/* VECTOR SHIFT RIGHT ARITHMETIC BY BYTE */
1172
+    F(0xe77f, VSRAB,   VRR_c, V,   0, 0, 0, 0, vsra, 0, IF_VEC)
1173
+/* VECTOR SHIFT RIGHT LOGICAL */
1174
+    F(0xe77c, VSRL,    VRR_c, V,   0, 0, 0, 0, vsrl, 0, IF_VEC)
1175
+/* VECTOR SHIFT RIGHT LOGICAL BY BYTE */
1176
+    F(0xe77d, VSRLB,   VRR_c, V,   0, 0, 0, 0, vsrl, 0, IF_VEC)
1177
+/* VECTOR SUBTRACT */
1178
+    F(0xe7f7, VS,      VRR_c, V,   0, 0, 0, 0, vs, 0, IF_VEC)
1179
+/* VECTOR SUBTRACT COMPUTE BORROW INDICATION */
1180
+    F(0xe7f5, VSCBI,   VRR_c, V,   0, 0, 0, 0, vscbi, 0, IF_VEC)
1181
+/* VECTOR SUBTRACT WITH BORROW INDICATION */
1182
+    F(0xe7bf, VSBI,    VRR_d, V,   0, 0, 0, 0, vsbi, 0, IF_VEC)
1183
+/* VECTOR SUBTRACT WITH BORROW COMPUTE BORROW INDICATION */
1184
+    F(0xe7bd, VSBCBI,  VRR_d, V,   0, 0, 0, 0, vsbcbi, 0, IF_VEC)
1185
+/* VECTOR SUM ACROSS DOUBLEWORD */
1186
+    F(0xe765, VSUMG,   VRR_c, V,   0, 0, 0, 0, vsumg, 0, IF_VEC)
1187
+/* VECTOR SUM ACROSS QUADWORD */
1188
+    F(0xe767, VSUMQ,   VRR_c, V,   0, 0, 0, 0, vsumq, 0, IF_VEC)
1189
+/* VECTOR SUM ACROSS WORD */
1190
+    F(0xe764, VSUM,    VRR_c, V,   0, 0, 0, 0, vsum, 0, IF_VEC)
1191
+/* VECTOR TEST UNDER MASK */
1192
+    F(0xe7d8, VTM,     VRR_a, V,   0, 0, 0, 0, vtm, 0, IF_VEC)
1193
+
1057 1194
 #ifndef CONFIG_USER_ONLY
1058 1195
 /* COMPARE AND SWAP AND PURGE */
1059 1196
     E(0xb250, CSP,     RRE,   Z,   r1_32u, ra2, r1_P, 0, csp, 0, MO_TEUL, IF_PRIV)

+ 1
- 0
target/s390x/internal.h View File

@@ -200,6 +200,7 @@ enum cc_op {
200 200
     CC_OP_SLA_64,               /* Calculate shift left signed (64bit) */
201 201
     CC_OP_FLOGR,                /* find leftmost one */
202 202
     CC_OP_LCBB,                 /* load count to block boundary */
203
+    CC_OP_VC,                   /* vector compare result */
203 204
     CC_OP_MAX
204 205
 };
205 206
 

+ 2
- 0
target/s390x/translate.c View File

@@ -572,6 +572,7 @@ static void gen_op_calc_cc(DisasContext *s)
572 572
     case CC_OP_SLA_32:
573 573
     case CC_OP_SLA_64:
574 574
     case CC_OP_NZ_F128:
575
+    case CC_OP_VC:
575 576
         /* 2 arguments */
576 577
         gen_helper_calc_cc(cc_op, cpu_env, local_cc_op, cc_src, cc_dst, dummy);
577 578
         break;
@@ -6092,6 +6093,7 @@ enum DisasInsnEnum {
6092 6093
 #define FAC_PCI         S390_FEAT_ZPCI /* z/PCI facility */
6093 6094
 #define FAC_AIS         S390_FEAT_ADAPTER_INT_SUPPRESSION
6094 6095
 #define FAC_V           S390_FEAT_VECTOR /* vector facility */
6096
+#define FAC_VE          S390_FEAT_VECTOR_ENH /* vector enhancements facility 1 */
6095 6097
 
6096 6098
 static const DisasInsn insn_info[] = {
6097 6099
 #include "insn-data.def"

+ 1420
- 0
target/s390x/translate_vx.inc.c View File

@@ -90,6 +90,33 @@ static void read_vec_element_i64(TCGv_i64 dst, uint8_t reg, uint8_t enr,
90 90
     }
91 91
 }
92 92
 
93
+static void read_vec_element_i32(TCGv_i32 dst, uint8_t reg, uint8_t enr,
94
+                                 TCGMemOp memop)
95
+{
96
+    const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
97
+
98
+    switch (memop) {
99
+    case ES_8:
100
+        tcg_gen_ld8u_i32(dst, cpu_env, offs);
101
+        break;
102
+    case ES_16:
103
+        tcg_gen_ld16u_i32(dst, cpu_env, offs);
104
+        break;
105
+    case ES_8 | MO_SIGN:
106
+        tcg_gen_ld8s_i32(dst, cpu_env, offs);
107
+        break;
108
+    case ES_16 | MO_SIGN:
109
+        tcg_gen_ld16s_i32(dst, cpu_env, offs);
110
+        break;
111
+    case ES_32:
112
+    case ES_32 | MO_SIGN:
113
+        tcg_gen_ld_i32(dst, cpu_env, offs);
114
+        break;
115
+    default:
116
+        g_assert_not_reached();
117
+    }
118
+}
119
+
93 120
 static void write_vec_element_i64(TCGv_i64 src, int reg, uint8_t enr,
94 121
                                   TCGMemOp memop)
95 122
 {
@@ -113,6 +140,25 @@ static void write_vec_element_i64(TCGv_i64 src, int reg, uint8_t enr,
113 140
     }
114 141
 }
115 142
 
143
+static void write_vec_element_i32(TCGv_i32 src, int reg, uint8_t enr,
144
+                                  TCGMemOp memop)
145
+{
146
+    const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
147
+
148
+    switch (memop) {
149
+    case ES_8:
150
+        tcg_gen_st8_i32(src, cpu_env, offs);
151
+        break;
152
+    case ES_16:
153
+        tcg_gen_st16_i32(src, cpu_env, offs);
154
+        break;
155
+    case ES_32:
156
+        tcg_gen_st_i32(src, cpu_env, offs);
157
+        break;
158
+    default:
159
+        g_assert_not_reached();
160
+    }
161
+}
116 162
 
117 163
 static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
118 164
                                     uint8_t es)
@@ -136,12 +182,30 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
136 182
     tcg_temp_free_i64(tmp);
137 183
 }
138 184
 
185
+#define gen_gvec_2(v1, v2, gen) \
186
+    tcg_gen_gvec_2(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
187
+                   16, 16, gen)
188
+#define gen_gvec_2s(v1, v2, c, gen) \
189
+    tcg_gen_gvec_2s(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
190
+                    16, 16, c, gen)
191
+#define gen_gvec_2i_ool(v1, v2, c, data, fn) \
192
+    tcg_gen_gvec_2i_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
193
+                        c, 16, 16, data, fn)
194
+#define gen_gvec_2_ptr(v1, v2, ptr, data, fn) \
195
+    tcg_gen_gvec_2_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
196
+                       ptr, 16, 16, data, fn)
197
+#define gen_gvec_3(v1, v2, v3, gen) \
198
+    tcg_gen_gvec_3(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
199
+                   vec_full_reg_offset(v3), 16, 16, gen)
139 200
 #define gen_gvec_3_ool(v1, v2, v3, data, fn) \
140 201
     tcg_gen_gvec_3_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
141 202
                        vec_full_reg_offset(v3), 16, 16, data, fn)
142 203
 #define gen_gvec_3_ptr(v1, v2, v3, ptr, data, fn) \
143 204
     tcg_gen_gvec_3_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
144 205
                        vec_full_reg_offset(v3), ptr, 16, 16, data, fn)
206
+#define gen_gvec_3i(v1, v2, v3, c, gen) \
207
+    tcg_gen_gvec_3i(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
208
+                    vec_full_reg_offset(v3), c, 16, 16, gen)
145 209
 #define gen_gvec_4(v1, v2, v3, v4, gen) \
146 210
     tcg_gen_gvec_4(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
147 211
                    vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
@@ -157,6 +221,85 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
157 221
                      16)
158 222
 #define gen_gvec_dup64i(v1, c) \
159 223
     tcg_gen_gvec_dup64i(vec_full_reg_offset(v1), 16, 16, c)
224
+#define gen_gvec_fn_2(fn, es, v1, v2) \
225
+    tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
226
+                      16, 16)
227
+#define gen_gvec_fn_2i(fn, es, v1, v2, c) \
228
+    tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
229
+                      c, 16, 16)
230
+#define gen_gvec_fn_2s(fn, es, v1, v2, s) \
231
+    tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
232
+                      s, 16, 16)
233
+#define gen_gvec_fn_3(fn, es, v1, v2, v3) \
234
+    tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
235
+                      vec_full_reg_offset(v3), 16, 16)
236
+
237
+/*
238
+ * Helper to carry out a 128 bit vector computation using 2 i64 values per
239
+ * vector.
240
+ */
241
+typedef void (*gen_gvec128_3_i64_fn)(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
242
+                                     TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh);
243
+static void gen_gvec128_3_i64(gen_gvec128_3_i64_fn fn, uint8_t d, uint8_t a,
244
+                              uint8_t b)
245
+{
246
+        TCGv_i64 dh = tcg_temp_new_i64();
247
+        TCGv_i64 dl = tcg_temp_new_i64();
248
+        TCGv_i64 ah = tcg_temp_new_i64();
249
+        TCGv_i64 al = tcg_temp_new_i64();
250
+        TCGv_i64 bh = tcg_temp_new_i64();
251
+        TCGv_i64 bl = tcg_temp_new_i64();
252
+
253
+        read_vec_element_i64(ah, a, 0, ES_64);
254
+        read_vec_element_i64(al, a, 1, ES_64);
255
+        read_vec_element_i64(bh, b, 0, ES_64);
256
+        read_vec_element_i64(bl, b, 1, ES_64);
257
+        fn(dl, dh, al, ah, bl, bh);
258
+        write_vec_element_i64(dh, d, 0, ES_64);
259
+        write_vec_element_i64(dl, d, 1, ES_64);
260
+
261
+        tcg_temp_free_i64(dh);
262
+        tcg_temp_free_i64(dl);
263
+        tcg_temp_free_i64(ah);
264
+        tcg_temp_free_i64(al);
265
+        tcg_temp_free_i64(bh);
266
+        tcg_temp_free_i64(bl);
267
+}
268
+
269
+typedef void (*gen_gvec128_4_i64_fn)(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
270
+                                     TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh,
271
+                                     TCGv_i64 cl, TCGv_i64 ch);
272
+static void gen_gvec128_4_i64(gen_gvec128_4_i64_fn fn, uint8_t d, uint8_t a,
273
+                              uint8_t b, uint8_t c)
274
+{
275
+        TCGv_i64 dh = tcg_temp_new_i64();
276
+        TCGv_i64 dl = tcg_temp_new_i64();
277
+        TCGv_i64 ah = tcg_temp_new_i64();
278
+        TCGv_i64 al = tcg_temp_new_i64();
279
+        TCGv_i64 bh = tcg_temp_new_i64();
280
+        TCGv_i64 bl = tcg_temp_new_i64();
281
+        TCGv_i64 ch = tcg_temp_new_i64();
282
+        TCGv_i64 cl = tcg_temp_new_i64();
283
+
284
+        read_vec_element_i64(ah, a, 0, ES_64);
285
+        read_vec_element_i64(al, a, 1, ES_64);
286
+        read_vec_element_i64(bh, b, 0, ES_64);
287
+        read_vec_element_i64(bl, b, 1, ES_64);
288
+        read_vec_element_i64(ch, c, 0, ES_64);
289
+        read_vec_element_i64(cl, c, 1, ES_64);
290
+        fn(dl, dh, al, ah, bl, bh, cl, ch);
291
+        write_vec_element_i64(dh, d, 0, ES_64);
292
+        write_vec_element_i64(dl, d, 1, ES_64);
293
+
294
+        tcg_temp_free_i64(dh);
295
+        tcg_temp_free_i64(dl);
296
+        tcg_temp_free_i64(ah);
297
+        tcg_temp_free_i64(al);
298
+        tcg_temp_free_i64(bh);
299
+        tcg_temp_free_i64(bl);
300
+        tcg_temp_free_i64(ch);
301
+        tcg_temp_free_i64(cl);
302
+}
160 303
 
161 304
 static void gen_gvec_dupi(uint8_t es, uint8_t reg, uint64_t c)
162 305
 {
@@ -183,6 +326,17 @@ static void zero_vec(uint8_t reg)
183 326
     tcg_gen_gvec_dup8i(vec_full_reg_offset(reg), 16, 16, 0);
184 327
 }
185 328
 
329
+static void gen_addi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
330
+                          uint64_t b)
331
+{
332
+    TCGv_i64 bl = tcg_const_i64(b);
333
+    TCGv_i64 bh = tcg_const_i64(0);
334
+
335
+    tcg_gen_add2_i64(dl, dh, al, ah, bl, bh);
336
+    tcg_temp_free_i64(bl);
337
+    tcg_temp_free_i64(bh);
338
+}
339
+
186 340
 static DisasJumpType op_vge(DisasContext *s, DisasOps *o)
187 341
 {
188 342
     const uint8_t es = s->insn->data;
@@ -933,3 +1087,1269 @@ static DisasJumpType op_vup(DisasContext *s, DisasOps *o)
933 1087
     tcg_temp_free_i64(tmp);
934 1088
     return DISAS_NEXT;
935 1089
 }
1090
+
1091
+static DisasJumpType op_va(DisasContext *s, DisasOps *o)
1092
+{
1093
+    const uint8_t es = get_field(s->fields, m4);
1094
+
1095
+    if (es > ES_128) {
1096
+        gen_program_exception(s, PGM_SPECIFICATION);
1097
+        return DISAS_NORETURN;
1098
+    } else if (es == ES_128) {
1099
+        gen_gvec128_3_i64(tcg_gen_add2_i64, get_field(s->fields, v1),
1100
+                          get_field(s->fields, v2), get_field(s->fields, v3));
1101
+        return DISAS_NEXT;
1102
+    }
1103
+    gen_gvec_fn_3(add, es, get_field(s->fields, v1), get_field(s->fields, v2),
1104
+                  get_field(s->fields, v3));
1105
+    return DISAS_NEXT;
1106
+}
1107
+
1108
+static void gen_acc(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, uint8_t es)
1109
+{
1110
+    const uint8_t msb_bit_nr = NUM_VEC_ELEMENT_BITS(es) - 1;
1111
+    TCGv_i64 msb_mask = tcg_const_i64(dup_const(es, 1ull << msb_bit_nr));
1112
+    TCGv_i64 t1 = tcg_temp_new_i64();
1113
+    TCGv_i64 t2 = tcg_temp_new_i64();
1114
+    TCGv_i64 t3 = tcg_temp_new_i64();
1115
+
1116
+    /* Calculate the carry into the MSB, ignoring the old MSBs */
1117
+    tcg_gen_andc_i64(t1, a, msb_mask);
1118
+    tcg_gen_andc_i64(t2, b, msb_mask);
1119
+    tcg_gen_add_i64(t1, t1, t2);
1120
+    /* Calculate the MSB without any carry into it */
1121
+    tcg_gen_xor_i64(t3, a, b);
1122
+    /* Calculate the carry out of the MSB in the MSB bit position */
1123
+    tcg_gen_and_i64(d, a, b);
1124
+    tcg_gen_and_i64(t1, t1, t3);
1125
+    tcg_gen_or_i64(d, d, t1);
1126
+    /* Isolate and shift the carry into position */
1127
+    tcg_gen_and_i64(d, d, msb_mask);
1128
+    tcg_gen_shri_i64(d, d, msb_bit_nr);
1129
+
1130
+    tcg_temp_free_i64(t1);
1131
+    tcg_temp_free_i64(t2);
1132
+    tcg_temp_free_i64(t3);
1133
+}
1134
+
1135
+static void gen_acc8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1136
+{
1137
+    gen_acc(d, a, b, ES_8);
1138
+}
1139
+
1140
+static void gen_acc16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1141
+{
1142
+    gen_acc(d, a, b, ES_16);
1143
+}
1144
+
1145
+static void gen_acc_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1146
+{
1147
+    TCGv_i32 t = tcg_temp_new_i32();
1148
+
1149
+    tcg_gen_add_i32(t, a, b);
1150
+    tcg_gen_setcond_i32(TCG_COND_LTU, d, t, b);
1151
+    tcg_temp_free_i32(t);
1152
+}
1153
+
1154
+static void gen_acc_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1155
+{
1156
+    TCGv_i64 t = tcg_temp_new_i64();
1157
+
1158
+    tcg_gen_add_i64(t, a, b);
1159
+    tcg_gen_setcond_i64(TCG_COND_LTU, d, t, b);
1160
+    tcg_temp_free_i64(t);
1161
+}
1162
+
1163
+static void gen_acc2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
1164
+                         TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
1165
+{
1166
+    TCGv_i64 th = tcg_temp_new_i64();
1167
+    TCGv_i64 tl = tcg_temp_new_i64();
1168
+    TCGv_i64 zero = tcg_const_i64(0);
1169
+
1170
+    tcg_gen_add2_i64(tl, th, al, zero, bl, zero);
1171
+    tcg_gen_add2_i64(tl, th, th, zero, ah, zero);
1172
+    tcg_gen_add2_i64(tl, dl, tl, th, bh, zero);
1173
+    tcg_gen_mov_i64(dh, zero);
1174
+
1175
+    tcg_temp_free_i64(th);
1176
+    tcg_temp_free_i64(tl);
1177
+    tcg_temp_free_i64(zero);
1178
+}
1179
+
1180
+static DisasJumpType op_vacc(DisasContext *s, DisasOps *o)
1181
+{
1182
+    const uint8_t es = get_field(s->fields, m4);
1183
+    static const GVecGen3 g[4] = {
1184
+        { .fni8 = gen_acc8_i64, },
1185
+        { .fni8 = gen_acc16_i64, },
1186
+        { .fni4 = gen_acc_i32, },
1187
+        { .fni8 = gen_acc_i64, },
1188
+    };
1189
+
1190
+    if (es > ES_128) {
1191
+        gen_program_exception(s, PGM_SPECIFICATION);
1192
+        return DISAS_NORETURN;
1193
+    } else if (es == ES_128) {
1194
+        gen_gvec128_3_i64(gen_acc2_i64, get_field(s->fields, v1),
1195
+                          get_field(s->fields, v2), get_field(s->fields, v3));
1196
+        return DISAS_NEXT;
1197
+    }
1198
+    gen_gvec_3(get_field(s->fields, v1), get_field(s->fields, v2),
1199
+               get_field(s->fields, v3), &g[es]);
1200
+    return DISAS_NEXT;
1201
+}
1202
+
1203
+static void gen_ac2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
1204
+                        TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch)
1205
+{
1206
+    TCGv_i64 tl = tcg_temp_new_i64();
1207
+    TCGv_i64 th = tcg_const_i64(0);
1208
+
1209
+    /* extract the carry only */
1210
+    tcg_gen_extract_i64(tl, cl, 0, 1);
1211
+    tcg_gen_add2_i64(dl, dh, al, ah, bl, bh);
1212
+    tcg_gen_add2_i64(dl, dh, dl, dh, tl, th);
1213
+
1214
+    tcg_temp_free_i64(tl);
1215
+    tcg_temp_free_i64(th);
1216
+}
1217
+
1218
+static DisasJumpType op_vac(DisasContext *s, DisasOps *o)
1219
+{
1220
+    if (get_field(s->fields, m5) != ES_128) {
1221
+        gen_program_exception(s, PGM_SPECIFICATION);
1222
+        return DISAS_NORETURN;
1223
+    }
1224
+
1225
+    gen_gvec128_4_i64(gen_ac2_i64, get_field(s->fields, v1),
1226
+                      get_field(s->fields, v2), get_field(s->fields, v3),
1227
+                      get_field(s->fields, v4));
1228
+    return DISAS_NEXT;
1229
+}
1230
+
1231
+static void gen_accc2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
1232
+                          TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch)
1233
+{
1234
+    TCGv_i64 tl = tcg_temp_new_i64();
1235
+    TCGv_i64 th = tcg_temp_new_i64();
1236
+    TCGv_i64 zero = tcg_const_i64(0);
1237
+
1238
+    tcg_gen_andi_i64(tl, cl, 1);
1239
+    tcg_gen_add2_i64(tl, th, tl, zero, al, zero);
1240
+    tcg_gen_add2_i64(tl, th, tl, th, bl, zero);
1241
+    tcg_gen_add2_i64(tl, th, th, zero, ah, zero);
1242
+    tcg_gen_add2_i64(tl, dl, tl, th, bh, zero);
1243
+    tcg_gen_mov_i64(dh, zero);
1244
+
1245
+    tcg_temp_free_i64(tl);
1246
+    tcg_temp_free_i64(th);
1247
+    tcg_temp_free_i64(zero);
1248
+}
1249
+
1250
+static DisasJumpType op_vaccc(DisasContext *s, DisasOps *o)
1251
+{
1252
+    if (get_field(s->fields, m5) != ES_128) {
1253
+        gen_program_exception(s, PGM_SPECIFICATION);
1254
+        return DISAS_NORETURN;
1255
+    }
1256
+
1257
+    gen_gvec128_4_i64(gen_accc2_i64, get_field(s->fields, v1),
1258
+                      get_field(s->fields, v2), get_field(s->fields, v3),
1259
+                      get_field(s->fields, v4));
1260
+    return DISAS_NEXT;
1261
+}
1262
+
1263
+static DisasJumpType op_vn(DisasContext *s, DisasOps *o)
1264
+{
1265
+    gen_gvec_fn_3(and, ES_8, get_field(s->fields, v1), get_field(s->fields, v2),
1266
+                  get_field(s->fields, v3));
1267
+    return DISAS_NEXT;
1268
+}
1269
+
1270
+static DisasJumpType op_vnc(DisasContext *s, DisasOps *o)
1271
+{
1272
+    gen_gvec_fn_3(andc, ES_8, get_field(s->fields, v1),
1273
+                  get_field(s->fields, v2), get_field(s->fields, v3));
1274
+    return DISAS_NEXT;
1275
+}
1276
+
1277
+static void gen_avg_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1278
+{
1279
+    TCGv_i64 t0 = tcg_temp_new_i64();
1280
+    TCGv_i64 t1 = tcg_temp_new_i64();
1281
+
1282
+    tcg_gen_ext_i32_i64(t0, a);
1283
+    tcg_gen_ext_i32_i64(t1, b);
1284
+    tcg_gen_add_i64(t0, t0, t1);
1285
+    tcg_gen_addi_i64(t0, t0, 1);
1286
+    tcg_gen_shri_i64(t0, t0, 1);
1287
+    tcg_gen_extrl_i64_i32(d, t0);
1288
+
1289
+    tcg_temp_free(t0);
1290
+    tcg_temp_free(t1);
1291
+}
1292
+
1293
+static void gen_avg_i64(TCGv_i64 dl, TCGv_i64 al, TCGv_i64 bl)
1294
+{
1295
+    TCGv_i64 dh = tcg_temp_new_i64();
1296
+    TCGv_i64 ah = tcg_temp_new_i64();
1297
+    TCGv_i64 bh = tcg_temp_new_i64();
1298
+
1299
+    /* extending the sign by one bit is sufficient */
1300
+    tcg_gen_extract_i64(ah, al, 63, 1);
1301
+    tcg_gen_extract_i64(bh, bl, 63, 1);
1302
+    tcg_gen_add2_i64(dl, dh, al, ah, bl, bh);
1303
+    gen_addi2_i64(dl, dh, dl, dh, 1);
1304
+    tcg_gen_extract2_i64(dl, dl, dh, 1);
1305
+
1306
+    tcg_temp_free_i64(dh);
1307
+    tcg_temp_free_i64(ah);
1308
+    tcg_temp_free_i64(bh);
1309
+}
1310
+
1311
+static DisasJumpType op_vavg(DisasContext *s, DisasOps *o)
1312
+{
1313
+    const uint8_t es = get_field(s->fields, m4);
1314
+    static const GVecGen3 g[4] = {
1315
+        { .fno = gen_helper_gvec_vavg8, },
1316
+        { .fno = gen_helper_gvec_vavg16, },
1317
+        { .fni4 = gen_avg_i32, },
1318
+        { .fni8 = gen_avg_i64, },
1319
+    };
1320
+
1321
+    if (es > ES_64) {
1322
+        gen_program_exception(s, PGM_SPECIFICATION);
1323
+        return DISAS_NORETURN;
1324
+    }
1325
+    gen_gvec_3(get_field(s->fields, v1), get_field(s->fields, v2),
1326
+               get_field(s->fields, v3), &g[es]);
1327
+    return DISAS_NEXT;
1328
+}
1329
+
1330
+static void gen_avgl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1331
+{
1332
+    TCGv_i64 t0 = tcg_temp_new_i64();
1333
+    TCGv_i64 t1 = tcg_temp_new_i64();
1334
+
1335
+    tcg_gen_extu_i32_i64(t0, a);
1336
+    tcg_gen_extu_i32_i64(t1, b);
1337
+    tcg_gen_add_i64(t0, t0, t1);
1338
+    tcg_gen_addi_i64(t0, t0, 1);
1339
+    tcg_gen_shri_i64(t0, t0, 1);
1340
+    tcg_gen_extrl_i64_i32(d, t0);
1341
+
1342
+    tcg_temp_free(t0);
1343
+    tcg_temp_free(t1);
1344
+}
1345
+
1346
+static void gen_avgl_i64(TCGv_i64 dl, TCGv_i64 al, TCGv_i64 bl)
1347
+{
1348
+    TCGv_i64 dh = tcg_temp_new_i64();
1349
+    TCGv_i64 zero = tcg_const_i64(0);
1350
+
1351
+    tcg_gen_add2_i64(dl, dh, al, zero, bl, zero);
1352
+    gen_addi2_i64(dl, dh, dl, dh, 1);
1353
+    tcg_gen_extract2_i64(dl, dl, dh, 1);
1354
+
1355
+    tcg_temp_free_i64(dh);
1356
+    tcg_temp_free_i64(zero);
1357
+}
1358
+
1359
+static DisasJumpType op_vavgl(DisasContext *s, DisasOps *o)
1360
+{
1361
+    const uint8_t es = get_field(s->fields, m4);
1362
+    static const GVecGen3 g[4] = {
1363
+        { .fno = gen_helper_gvec_vavgl8, },
1364
+        { .fno = gen_helper_gvec_vavgl16, },
1365
+        { .fni4 = gen_avgl_i32, },
1366
+        { .fni8 = gen_avgl_i64, },
1367
+    };
1368
+
1369
+    if (es > ES_64) {
1370
+        gen_program_exception(s, PGM_SPECIFICATION);
1371
+        return DISAS_NORETURN;
1372
+    }
1373
+    gen_gvec_3(get_field(s->fields, v1), get_field(s->fields, v2),
1374
+               get_field(s->fields, v3), &g[es]);
1375
+    return DISAS_NEXT;
1376
+}
1377
+
1378
+static DisasJumpType op_vcksm(DisasContext *s, DisasOps *o)
1379
+{
1380
+    TCGv_i32 tmp = tcg_temp_new_i32();
1381
+    TCGv_i32 sum = tcg_temp_new_i32();
1382
+    int i;
1383
+
1384
+    read_vec_element_i32(sum, get_field(s->fields, v3), 1, ES_32);
1385
+    for (i = 0; i < 4; i++) {
1386
+        read_vec_element_i32(tmp, get_field(s->fields, v2), i, ES_32);
1387
+        tcg_gen_add2_i32(tmp, sum, sum, sum, tmp, tmp);
1388
+    }
1389
+    zero_vec(get_field(s->fields, v1));
1390
+    write_vec_element_i32(sum, get_field(s->fields, v1), 1, ES_32);
1391
+
1392
+    tcg_temp_free_i32(tmp);
1393
+    tcg_temp_free_i32(sum);
1394
+    return DISAS_NEXT;
1395
+}
1396
+
1397
+static DisasJumpType op_vec(DisasContext *s, DisasOps *o)
1398
+{
1399
+    uint8_t es = get_field(s->fields, m3);
1400
+    const uint8_t enr = NUM_VEC_ELEMENTS(es) / 2 - 1;
1401
+
1402
+    if (es > ES_64) {
1403
+        gen_program_exception(s, PGM_SPECIFICATION);
1404
+        return DISAS_NORETURN;
1405
+    }
1406
+    if (s->fields->op2 == 0xdb) {
1407
+        es |= MO_SIGN;
1408
+    }
1409
+
1410
+    o->in1 = tcg_temp_new_i64();
1411
+    o->in2 = tcg_temp_new_i64();
1412
+    read_vec_element_i64(o->in1, get_field(s->fields, v1), enr, es);
1413
+    read_vec_element_i64(o->in2, get_field(s->fields, v2), enr, es);
1414
+    return DISAS_NEXT;
1415
+}
1416
+
1417
+static DisasJumpType op_vc(DisasContext *s, DisasOps *o)
1418
+{
1419
+    const uint8_t es = get_field(s->fields, m4);
1420
+    TCGCond cond = s->insn->data;
1421
+
1422
+    if (es > ES_64) {
1423
+        gen_program_exception(s, PGM_SPECIFICATION);
1424
+        return DISAS_NORETURN;
1425
+    }
1426
+
1427
+    tcg_gen_gvec_cmp(cond, es,
1428
+                     vec_full_reg_offset(get_field(s->fields, v1)),
1429
+                     vec_full_reg_offset(get_field(s->fields, v2)),
1430
+                     vec_full_reg_offset(get_field(s->fields, v3)), 16, 16);
1431
+    if (get_field(s->fields, m5) & 0x1) {
1432
+        TCGv_i64 low = tcg_temp_new_i64();
1433
+        TCGv_i64 high = tcg_temp_new_i64();
1434
+
1435
+        read_vec_element_i64(high, get_field(s->fields, v1), 0, ES_64);
1436
+        read_vec_element_i64(low, get_field(s->fields, v1), 1, ES_64);
1437
+        gen_op_update2_cc_i64(s, CC_OP_VC, low, high);
1438
+
1439
+        tcg_temp_free_i64(low);
1440
+        tcg_temp_free_i64(high);
1441
+    }
1442
+    return DISAS_NEXT;
1443
+}
1444
+
1445
+static void gen_clz_i32(TCGv_i32 d, TCGv_i32 a)
1446
+{
1447
+    tcg_gen_clzi_i32(d, a, 32);
1448
+}
1449
+
1450
+static void gen_clz_i64(TCGv_i64 d, TCGv_i64 a)
1451
+{
1452
+    tcg_gen_clzi_i64(d, a, 64);
1453
+}
1454
+
1455
+static DisasJumpType op_vclz(DisasContext *s, DisasOps *o)
1456
+{
1457
+    const uint8_t es = get_field(s->fields, m3);
1458
+    static const GVecGen2 g[4] = {
1459
+        { .fno = gen_helper_gvec_vclz8, },
1460
+        { .fno = gen_helper_gvec_vclz16, },
1461
+        { .fni4 = gen_clz_i32, },
1462
+        { .fni8 = gen_clz_i64, },
1463
+    };
1464
+
1465
+    if (es > ES_64) {
1466
+        gen_program_exception(s, PGM_SPECIFICATION);
1467
+        return DISAS_NORETURN;
1468
+    }
1469
+    gen_gvec_2(get_field(s->fields, v1), get_field(s->fields, v2), &g[es]);
1470
+    return DISAS_NEXT;
1471
+}
1472
+
1473
+static void gen_ctz_i32(TCGv_i32 d, TCGv_i32 a)
1474
+{
1475
+    tcg_gen_ctzi_i32(d, a, 32);
1476
+}
1477
+
1478
+static void gen_ctz_i64(TCGv_i64 d, TCGv_i64 a)
1479
+{
1480
+    tcg_gen_ctzi_i64(d, a, 64);
1481
+}
1482
+
1483
+static DisasJumpType op_vctz(DisasContext *s, DisasOps *o)
1484
+{
1485
+    const uint8_t es = get_field(s->fields, m3);
1486
+    static const GVecGen2 g[4] = {
1487
+        { .fno = gen_helper_gvec_vctz8, },
1488
+        { .fno = gen_helper_gvec_vctz16, },
1489
+        { .fni4 = gen_ctz_i32, },
1490
+        { .fni8 = gen_ctz_i64, },
1491
+    };
1492
+
1493
+    if (es > ES_64) {
1494
+        gen_program_exception(s, PGM_SPECIFICATION);
1495
+        return DISAS_NORETURN;
1496
+    }
1497
+    gen_gvec_2(get_field(s->fields, v1), get_field(s->fields, v2), &g[es]);
1498
+    return DISAS_NEXT;
1499
+}
1500
+
1501
+static DisasJumpType op_vx(DisasContext *s, DisasOps *o)
1502
+{
1503
+    gen_gvec_fn_3(xor, ES_8, get_field(s->fields, v1), get_field(s->fields, v2),
1504
+                 get_field(s->fields, v3));
1505
+    return DISAS_NEXT;
1506
+}
1507
+
1508
+static DisasJumpType op_vgfm(DisasContext *s, DisasOps *o)
1509
+{
1510
+    const uint8_t es = get_field(s->fields, m4);
1511
+    static const GVecGen3 g[4] = {
1512
+        { .fno = gen_helper_gvec_vgfm8, },
1513
+        { .fno = gen_helper_gvec_vgfm16, },
1514
+        { .fno = gen_helper_gvec_vgfm32, },
1515
+        { .fno = gen_helper_gvec_vgfm64, },
1516
+    };
1517
+
1518
+    if (es > ES_64) {
1519
+        gen_program_exception(s, PGM_SPECIFICATION);
1520
+        return DISAS_NORETURN;
1521
+    }
1522
+    gen_gvec_3(get_field(s->fields, v1), get_field(s->fields, v2),
1523
+               get_field(s->fields, v3), &g[es]);
1524
+    return DISAS_NEXT;
1525
+}
1526
+
1527
+static DisasJumpType op_vgfma(DisasContext *s, DisasOps *o)
1528
+{
1529
+    const uint8_t es = get_field(s->fields, m5);
1530
+    static const GVecGen4 g[4] = {
1531
+        { .fno = gen_helper_gvec_vgfma8, },
1532
+        { .fno = gen_helper_gvec_vgfma16, },
1533
+        { .fno = gen_helper_gvec_vgfma32, },
1534
+        { .fno = gen_helper_gvec_vgfma64, },
1535
+    };
1536
+
1537
+    if (es > ES_64) {
1538
+        gen_program_exception(s, PGM_SPECIFICATION);
1539
+        return DISAS_NORETURN;
1540
+    }
1541
+    gen_gvec_4(get_field(s->fields, v1), get_field(s->fields, v2),
1542
+               get_field(s->fields, v3), get_field(s->fields, v4), &g[es]);
1543
+    return DISAS_NEXT;
1544
+}
1545
+
1546
+static DisasJumpType op_vlc(DisasContext *s, DisasOps *o)
1547
+{
1548
+    const uint8_t es = get_field(s->fields, m3);
1549
+
1550
+    if (es > ES_64) {
1551
+        gen_program_exception(s, PGM_SPECIFICATION);
1552
+        return DISAS_NORETURN;
1553
+    }
1554
+
1555
+    gen_gvec_fn_2(neg, es, get_field(s->fields, v1), get_field(s->fields, v2));
1556
+    return DISAS_NEXT;
1557
+}
1558
+
1559
+static DisasJumpType op_vlp(DisasContext *s, DisasOps *o)
1560
+{
1561
+    const uint8_t es = get_field(s->fields, m3);
1562
+
1563
+    if (es > ES_64) {
1564
+        gen_program_exception(s, PGM_SPECIFICATION);
1565
+        return DISAS_NORETURN;
1566
+    }
1567
+
1568
+    gen_gvec_fn_2(abs, es, get_field(s->fields, v1), get_field(s->fields, v2));
1569
+    return DISAS_NEXT;
1570
+}
1571
+
1572
+static DisasJumpType op_vmx(DisasContext *s, DisasOps *o)
1573
+{
1574
+    const uint8_t v1 = get_field(s->fields, v1);
1575
+    const uint8_t v2 = get_field(s->fields, v2);
1576
+    const uint8_t v3 = get_field(s->fields, v3);
1577
+    const uint8_t es = get_field(s->fields, m4);
1578
+
1579
+    if (es > ES_64) {
1580
+        gen_program_exception(s, PGM_SPECIFICATION);
1581
+        return DISAS_NORETURN;
1582
+    }
1583
+
1584
+    switch (s->fields->op2) {
1585
+    case 0xff:
1586
+        gen_gvec_fn_3(smax, es, v1, v2, v3);
1587
+        break;
1588
+    case 0xfd:
1589
+        gen_gvec_fn_3(umax, es, v1, v2, v3);
1590
+        break;
1591
+    case 0xfe:
1592
+        gen_gvec_fn_3(smin, es, v1, v2, v3);
1593
+        break;
1594
+    case 0xfc:
1595
+        gen_gvec_fn_3(umin, es, v1, v2, v3);
1596
+        break;
1597
+    default:
1598
+        g_assert_not_reached();
1599
+    }
1600
+    return DISAS_NEXT;
1601
+}
1602
+
1603
+static void gen_mal_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, TCGv_i32 c)
1604
+{
1605
+    TCGv_i32 t0 = tcg_temp_new_i32();
1606
+
1607
+    tcg_gen_mul_i32(t0, a, b);
1608
+    tcg_gen_add_i32(d, t0, c);
1609
+
1610
+    tcg_temp_free_i32(t0);
1611
+}
1612
+
1613
+static void gen_mah_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, TCGv_i32 c)
1614
+{
1615
+    TCGv_i64 t0 = tcg_temp_new_i64();
1616
+    TCGv_i64 t1 = tcg_temp_new_i64();
1617
+    TCGv_i64 t2 = tcg_temp_new_i64();
1618
+
1619
+    tcg_gen_ext_i32_i64(t0, a);
1620
+    tcg_gen_ext_i32_i64(t1, b);
1621
+    tcg_gen_ext_i32_i64(t2, c);
1622
+    tcg_gen_mul_i64(t0, t0, t1);
1623
+    tcg_gen_add_i64(t0, t0, t2);
1624
+    tcg_gen_extrh_i64_i32(d, t0);
1625
+
1626
+    tcg_temp_free(t0);
1627
+    tcg_temp_free(t1);
1628
+    tcg_temp_free(t2);
1629
+}
1630
+
1631
+static void gen_malh_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, TCGv_i32 c)
1632
+{
1633
+    TCGv_i64 t0 = tcg_temp_new_i64();
1634
+    TCGv_i64 t1 = tcg_temp_new_i64();
1635
+    TCGv_i64 t2 = tcg_temp_new_i64();
1636
+
1637
+    tcg_gen_extu_i32_i64(t0, a);
1638
+    tcg_gen_extu_i32_i64(t1, b);
1639
+    tcg_gen_extu_i32_i64(t2, c);
1640
+    tcg_gen_mul_i64(t0, t0, t1);
1641
+    tcg_gen_add_i64(t0, t0, t2);
1642
+    tcg_gen_extrh_i64_i32(d, t0);
1643
+
1644
+    tcg_temp_free(t0);
1645
+    tcg_temp_free(t1);
1646
+    tcg_temp_free(t2);
1647
+}
1648
+
1649
+static DisasJumpType op_vma(DisasContext *s, DisasOps *o)
1650
+{
1651
+    const uint8_t es = get_field(s->fields, m5);
1652
+    static const GVecGen4 g_vmal[3] = {
1653
+        { .fno = gen_helper_gvec_vmal8, },
1654
+        { .fno = gen_helper_gvec_vmal16, },
1655
+        { .fni4 = gen_mal_i32, },
1656
+    };
1657
+    static const GVecGen4 g_vmah[3] = {
1658
+        { .fno = gen_helper_gvec_vmah8, },
1659
+        { .fno = gen_helper_gvec_vmah16, },
1660
+        { .fni4 = gen_mah_i32, },
1661
+    };
1662
+    static const GVecGen4 g_vmalh[3] = {
1663
+        { .fno = gen_helper_gvec_vmalh8, },
1664
+        { .fno = gen_helper_gvec_vmalh16, },
1665
+        { .fni4 = gen_malh_i32, },
1666
+    };
1667
+    static const GVecGen4 g_vmae[3] = {
1668
+        { .fno = gen_helper_gvec_vmae8, },
1669
+        { .fno = gen_helper_gvec_vmae16, },
1670
+        { .fno = gen_helper_gvec_vmae32, },
1671
+    };
1672
+    static const GVecGen4 g_vmale[3] = {
1673
+        { .fno = gen_helper_gvec_vmale8, },
1674
+        { .fno = gen_helper_gvec_vmale16, },
1675
+        { .fno = gen_helper_gvec_vmale32, },
1676
+    };
1677
+    static const GVecGen4 g_vmao[3] = {
1678
+        { .fno = gen_helper_gvec_vmao8, },
1679
+        { .fno = gen_helper_gvec_vmao16, },
1680
+        { .fno = gen_helper_gvec_vmao32, },
1681
+    };
1682
+    static const GVecGen4 g_vmalo[3] = {
1683
+        { .fno = gen_helper_gvec_vmalo8, },
1684
+        { .fno = gen_helper_gvec_vmalo16, },
1685
+        { .fno = gen_helper_gvec_vmalo32, },
1686
+    };
1687
+    const GVecGen4 *fn;
1688
+
1689
+    if (es > ES_32) {
1690
+        gen_program_exception(s, PGM_SPECIFICATION);
1691
+        return DISAS_NORETURN;
1692
+    }
1693
+
1694
+    switch (s->fields->op2) {
1695
+    case 0xaa:
1696
+        fn = &g_vmal[es];
1697
+        break;
1698
+    case 0xab:
1699
+        fn = &g_vmah[es];
1700
+        break;
1701
+    case 0xa9:
1702
+        fn = &g_vmalh[es];
1703
+        break;
1704
+    case 0xae:
1705
+        fn = &g_vmae[es];
1706
+        break;
1707
+    case 0xac:
1708
+        fn = &g_vmale[es];
1709
+        break;
1710
+    case 0xaf:
1711
+        fn = &g_vmao[es];
1712
+        break;
1713
+    case 0xad:
1714
+        fn = &g_vmalo[es];
1715
+        break;
1716
+    default:
1717
+        g_assert_not_reached();
1718
+    }
1719
+
1720
+    gen_gvec_4(get_field(s->fields, v1), get_field(s->fields, v2),
1721
+               get_field(s->fields, v3), get_field(s->fields, v4), fn);
1722
+    return DISAS_NEXT;
1723
+}
1724
+
1725
+static void gen_mh_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1726
+{
1727
+    TCGv_i32 t = tcg_temp_new_i32();
1728
+
1729
+    tcg_gen_muls2_i32(t, d, a, b);
1730
+    tcg_temp_free_i32(t);
1731
+}
1732
+
1733
+static void gen_mlh_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1734
+{
1735
+    TCGv_i32 t = tcg_temp_new_i32();
1736
+
1737
+    tcg_gen_mulu2_i32(t, d, a, b);
1738
+    tcg_temp_free_i32(t);
1739
+}
1740
+
1741
+static DisasJumpType op_vm(DisasContext *s, DisasOps *o)
1742
+{
1743
+    const uint8_t es = get_field(s->fields, m4);
1744
+    static const GVecGen3 g_vmh[3] = {
1745
+        { .fno = gen_helper_gvec_vmh8, },
1746
+        { .fno = gen_helper_gvec_vmh16, },
1747
+        { .fni4 = gen_mh_i32, },
1748
+    };
1749
+    static const GVecGen3 g_vmlh[3] = {
1750
+        { .fno = gen_helper_gvec_vmlh8, },
1751
+        { .fno = gen_helper_gvec_vmlh16, },
1752
+        { .fni4 = gen_mlh_i32, },
1753
+    };
1754
+    static const GVecGen3 g_vme[3] = {
1755
+        { .fno = gen_helper_gvec_vme8, },
1756
+        { .fno = gen_helper_gvec_vme16, },
1757
+        { .fno = gen_helper_gvec_vme32, },
1758
+    };
1759
+    static const GVecGen3 g_vmle[3] = {
1760
+        { .fno = gen_helper_gvec_vmle8, },
1761
+        { .fno = gen_helper_gvec_vmle16, },
1762
+        { .fno = gen_helper_gvec_vmle32, },
1763
+    };
1764
+    static const GVecGen3 g_vmo[3] = {
1765
+        { .fno = gen_helper_gvec_vmo8, },
1766
+        { .fno = gen_helper_gvec_vmo16, },
1767
+        { .fno = gen_helper_gvec_vmo32, },
1768
+    };
1769
+    static const GVecGen3 g_vmlo[3] = {
1770
+        { .fno = gen_helper_gvec_vmlo8, },
1771
+        { .fno = gen_helper_gvec_vmlo16, },
1772
+        { .fno = gen_helper_gvec_vmlo32, },
1773
+    };
1774
+    const GVecGen3 *fn;
1775
+
1776
+    if (es > ES_32) {
1777
+        gen_program_exception(s, PGM_SPECIFICATION);
1778
+        return DISAS_NORETURN;
1779
+    }
1780
+
1781
+    switch (s->fields->op2) {
1782
+    case 0xa2:
1783
+        gen_gvec_fn_3(mul, es, get_field(s->fields, v1),
1784
+                      get_field(s->fields, v2), get_field(s->fields, v3));
1785
+        return DISAS_NEXT;
1786
+    case 0xa3:
1787
+        fn = &g_vmh[es];
1788
+        break;
1789
+    case 0xa1:
1790
+        fn = &g_vmlh[es];
1791
+        break;
1792
+    case 0xa6:
1793
+        fn = &g_vme[es];
1794
+        break;
1795
+    case 0xa4:
1796
+        fn = &g_vmle[es];
1797
+        break;
1798
+    case 0xa7:
1799
+        fn = &g_vmo[es];
1800
+        break;
1801
+    case 0xa5:
1802
+        fn = &g_vmlo[es];
1803
+        break;
1804
+    default:
1805
+        g_assert_not_reached();
1806
+    }
1807
+
1808
+    gen_gvec_3(get_field(s->fields, v1), get_field(s->fields, v2),
1809
+               get_field(s->fields, v3), fn);
1810
+    return DISAS_NEXT;
1811
+}
1812
+
1813
+static DisasJumpType op_vnn(DisasContext *s, DisasOps *o)
1814
+{
1815
+    gen_gvec_fn_3(nand, ES_8, get_field(s->fields, v1),
1816
+                  get_field(s->fields, v2), get_field(s->fields, v3));
1817
+    return DISAS_NEXT;
1818
+}
1819
+
1820
+static DisasJumpType op_vno(DisasContext *s, DisasOps *o)
1821
+{
1822
+    gen_gvec_fn_3(nor, ES_8, get_field(s->fields, v1), get_field(s->fields, v2),
1823
+                  get_field(s->fields, v3));
1824
+    return DISAS_NEXT;
1825
+}
1826
+
1827
+static DisasJumpType op_vnx(DisasContext *s, DisasOps *o)
1828
+{
1829
+    gen_gvec_fn_3(eqv, ES_8, get_field(s->fields, v1), get_field(s->fields, v2),
1830
+                  get_field(s->fields, v3));
1831
+    return DISAS_NEXT;
1832
+}
1833
+
1834
+static DisasJumpType op_vo(DisasContext *s, DisasOps *o)
1835
+{
1836
+    gen_gvec_fn_3(or, ES_8, get_field(s->fields, v1), get_field(s->fields, v2),
1837
+                  get_field(s->fields, v3));
1838
+    return DISAS_NEXT;
1839
+}
1840
+
1841
+static DisasJumpType op_voc(DisasContext *s, DisasOps *o)
1842
+{
1843
+    gen_gvec_fn_3(orc, ES_8, get_field(s->fields, v1), get_field(s->fields, v2),
1844
+                  get_field(s->fields, v3));
1845
+    return DISAS_NEXT;
1846
+}
1847
+
1848
+static DisasJumpType op_vpopct(DisasContext *s, DisasOps *o)
1849
+{
1850
+    const uint8_t es = get_field(s->fields, m3);
1851
+    static const GVecGen2 g[4] = {
1852
+        { .fno = gen_helper_gvec_vpopct8, },
1853
+        { .fno = gen_helper_gvec_vpopct16, },
1854
+        { .fni4 = tcg_gen_ctpop_i32, },
1855
+        { .fni8 = tcg_gen_ctpop_i64, },
1856
+    };
1857
+
1858
+    if (es > ES_64 || (es != ES_8 && !s390_has_feat(S390_FEAT_VECTOR_ENH))) {
1859
+        gen_program_exception(s, PGM_SPECIFICATION);
1860
+        return DISAS_NORETURN;
1861
+    }
1862
+
1863
+    gen_gvec_2(get_field(s->fields, v1), get_field(s->fields, v2), &g[es]);
1864
+    return DISAS_NEXT;
1865
+}
1866
+
1867
+static void gen_rll_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1868
+{
1869
+    TCGv_i32 t0 = tcg_temp_new_i32();
1870
+
1871
+    tcg_gen_andi_i32(t0, b, 31);
1872
+    tcg_gen_rotl_i32(d, a, t0);
1873
+    tcg_temp_free_i32(t0);
1874
+}
1875
+
1876
+static void gen_rll_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1877
+{
1878
+    TCGv_i64 t0 = tcg_temp_new_i64();
1879
+
1880
+    tcg_gen_andi_i64(t0, b, 63);
1881
+    tcg_gen_rotl_i64(d, a, t0);
1882
+    tcg_temp_free_i64(t0);
1883
+}
1884
+
1885
+static DisasJumpType op_verllv(DisasContext *s, DisasOps *o)
1886
+{
1887
+    const uint8_t es = get_field(s->fields, m4);
1888
+    static const GVecGen3 g[4] = {
1889
+        { .fno = gen_helper_gvec_verllv8, },
1890
+        { .fno = gen_helper_gvec_verllv16, },
1891
+        { .fni4 = gen_rll_i32, },
1892
+        { .fni8 = gen_rll_i64, },
1893
+    };
1894
+
1895
+    if (es > ES_64) {
1896
+        gen_program_exception(s, PGM_SPECIFICATION);
1897
+        return DISAS_NORETURN;
1898
+    }
1899
+
1900
+    gen_gvec_3(get_field(s->fields, v1), get_field(s->fields, v2),
1901
+               get_field(s->fields, v3), &g[es]);
1902
+    return DISAS_NEXT;
1903
+}
1904
+
1905
+static DisasJumpType op_verll(DisasContext *s, DisasOps *o)
1906
+{
1907
+    const uint8_t es = get_field(s->fields, m4);
1908
+    static const GVecGen2s g[4] = {
1909
+        { .fno = gen_helper_gvec_verll8, },
1910
+        { .fno = gen_helper_gvec_verll16, },
1911
+        { .fni4 = gen_rll_i32, },
1912
+        { .fni8 = gen_rll_i64, },
1913
+    };
1914
+
1915
+    if (es > ES_64) {
1916
+        gen_program_exception(s, PGM_SPECIFICATION);
1917
+        return DISAS_NORETURN;
1918
+    }
1919
+    gen_gvec_2s(get_field(s->fields, v1), get_field(s->fields, v3), o->addr1,
1920
+                &g[es]);
1921
+    return DISAS_NEXT;
1922
+}
1923
+
1924
+static void gen_rim_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, int32_t c)
1925
+{
1926
+    TCGv_i32 t = tcg_temp_new_i32();
1927
+
1928
+    tcg_gen_rotli_i32(t, a, c & 31);
1929
+    tcg_gen_and_i32(t, t, b);
1930
+    tcg_gen_andc_i32(d, d, b);
1931
+    tcg_gen_or_i32(d, d, t);
1932
+
1933
+    tcg_temp_free_i32(t);
1934
+}
1935
+
1936
+static void gen_rim_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, int64_t c)
1937
+{
1938
+    TCGv_i64 t = tcg_temp_new_i64();
1939
+
1940
+    tcg_gen_rotli_i64(t, a, c & 63);
1941
+    tcg_gen_and_i64(t, t, b);
1942
+    tcg_gen_andc_i64(d, d, b);
1943
+    tcg_gen_or_i64(d, d, t);
1944
+
1945
+    tcg_temp_free_i64(t);
1946
+}
1947
+
1948
+static DisasJumpType op_verim(DisasContext *s, DisasOps *o)
1949
+{
1950
+    const uint8_t es = get_field(s->fields, m5);
1951
+    const uint8_t i4 = get_field(s->fields, i4) &
1952
+                       (NUM_VEC_ELEMENT_BITS(es) - 1);
1953
+    static const GVecGen3i g[4] = {
1954
+        { .fno = gen_helper_gvec_verim8, },
1955
+        { .fno = gen_helper_gvec_verim16, },
1956
+        { .fni4 = gen_rim_i32,
1957
+          .load_dest = true, },
1958
+        { .fni8 = gen_rim_i64,
1959
+          .load_dest = true, },
1960
+    };
1961
+
1962
+    if (es > ES_64) {
1963
+        gen_program_exception(s, PGM_SPECIFICATION);
1964
+        return DISAS_NORETURN;
1965
+    }
1966
+
1967
+    gen_gvec_3i(get_field(s->fields, v1), get_field(s->fields, v2),
1968
+                get_field(s->fields, v3), i4, &g[es]);
1969
+    return DISAS_NEXT;
1970
+}
1971
+
1972
+static DisasJumpType op_vesv(DisasContext *s, DisasOps *o)
1973
+{
1974
+    const uint8_t es = get_field(s->fields, m4);
1975
+    const uint8_t v1 = get_field(s->fields, v1);
1976
+    const uint8_t v2 = get_field(s->fields, v2);
1977
+    const uint8_t v3 = get_field(s->fields, v3);
1978
+
1979
+    if (es > ES_64) {
1980
+        gen_program_exception(s, PGM_SPECIFICATION);
1981
+        return DISAS_NORETURN;
1982
+    }
1983
+
1984
+    switch (s->fields->op2) {
1985
+    case 0x70:
1986
+        gen_gvec_fn_3(shlv, es, v1, v2, v3);
1987
+        break;
1988
+    case 0x7a:
1989
+        gen_gvec_fn_3(sarv, es, v1, v2, v3);
1990
+        break;
1991
+    case 0x78:
1992
+        gen_gvec_fn_3(shrv, es, v1, v2, v3);
1993
+        break;
1994
+    default:
1995
+        g_assert_not_reached();
1996
+    }
1997
+    return DISAS_NEXT;
1998
+}
1999
+
2000
+static DisasJumpType op_ves(DisasContext *s, DisasOps *o)
2001
+{
2002
+    const uint8_t es = get_field(s->fields, m4);
2003
+    const uint8_t d2 = get_field(s->fields, d2) &
2004
+                       (NUM_VEC_ELEMENT_BITS(es) - 1);
2005
+    const uint8_t v1 = get_field(s->fields, v1);
2006
+    const uint8_t v3 = get_field(s->fields, v3);
2007
+    TCGv_i32 shift;
2008
+
2009
+    if (es > ES_64) {
2010
+        gen_program_exception(s, PGM_SPECIFICATION);
2011
+        return DISAS_NORETURN;
2012
+    }
2013
+
2014
+    if (likely(!get_field(s->fields, b2))) {
2015
+        switch (s->fields->op2) {
2016
+        case 0x30:
2017
+            gen_gvec_fn_2i(shli, es, v1, v3, d2);
2018
+            break;
2019
+        case 0x3a:
2020
+            gen_gvec_fn_2i(sari, es, v1, v3, d2);
2021
+            break;
2022
+        case 0x38:
2023
+            gen_gvec_fn_2i(shri, es, v1, v3, d2);
2024
+            break;
2025
+        default:
2026
+            g_assert_not_reached();
2027
+        }
2028
+    } else {
2029
+        shift = tcg_temp_new_i32();
2030
+        tcg_gen_extrl_i64_i32(shift, o->addr1);
2031
+        tcg_gen_andi_i32(shift, shift, NUM_VEC_ELEMENT_BITS(es) - 1);
2032
+        switch (s->fields->op2) {
2033
+        case 0x30:
2034
+            gen_gvec_fn_2s(shls, es, v1, v3, shift);
2035
+            break;
2036
+        case 0x3a:
2037
+            gen_gvec_fn_2s(sars, es, v1, v3, shift);
2038
+            break;
2039
+        case 0x38:
2040
+            gen_gvec_fn_2s(shrs, es, v1, v3, shift);
2041
+            break;
2042
+        default:
2043
+            g_assert_not_reached();
2044
+        }
2045
+        tcg_temp_free_i32(shift);
2046
+    }
2047
+    return DISAS_NEXT;
2048
+}
2049
+
2050
+static DisasJumpType op_vsl(DisasContext *s, DisasOps *o)
2051
+{
2052
+    TCGv_i64 shift = tcg_temp_new_i64();
2053
+
2054
+    read_vec_element_i64(shift, get_field(s->fields, v3), 7, ES_8);
2055
+    if (s->fields->op2 == 0x74) {
2056
+        tcg_gen_andi_i64(shift, shift, 0x7);
2057
+    } else {
2058
+        tcg_gen_andi_i64(shift, shift, 0x78);
2059
+    }
2060
+
2061
+    gen_gvec_2i_ool(get_field(s->fields, v1), get_field(s->fields, v2),
2062
+                    shift, 0, gen_helper_gvec_vsl);
2063
+    tcg_temp_free_i64(shift);
2064
+    return DISAS_NEXT;
2065
+}
2066
+
2067
+static DisasJumpType op_vsldb(DisasContext *s, DisasOps *o)
2068
+{
2069
+    const uint8_t i4 = get_field(s->fields, i4) & 0xf;
2070
+    const int left_shift = (i4 & 7) * 8;
2071
+    const int right_shift = 64 - left_shift;
2072
+    TCGv_i64 t0 = tcg_temp_new_i64();
2073
+    TCGv_i64 t1 = tcg_temp_new_i64();
2074
+    TCGv_i64 t2 = tcg_temp_new_i64();
2075
+
2076
+    if ((i4 & 8) == 0) {
2077
+        read_vec_element_i64(t0, get_field(s->fields, v2), 0, ES_64);
2078
+        read_vec_element_i64(t1, get_field(s->fields, v2), 1, ES_64);
2079
+        read_vec_element_i64(t2, get_field(s->fields, v3), 0, ES_64);
2080
+    } else {
2081
+        read_vec_element_i64(t0, get_field(s->fields, v2), 1, ES_64);
2082
+        read_vec_element_i64(t1, get_field(s->fields, v3), 0, ES_64);
2083
+        read_vec_element_i64(t2, get_field(s->fields, v3), 1, ES_64);
2084
+    }
2085
+    tcg_gen_extract2_i64(t0, t1, t0, right_shift);
2086
+    tcg_gen_extract2_i64(t1, t2, t1, right_shift);
2087
+    write_vec_element_i64(t0, get_field(s->fields, v1), 0, ES_64);
2088
+    write_vec_element_i64(t1, get_field(s->fields, v1), 1, ES_64);
2089
+
2090
+    tcg_temp_free(t0);
2091
+    tcg_temp_free(t1);
2092
+    tcg_temp_free(t2);
2093
+    return DISAS_NEXT;
2094
+}
2095
+
2096
+static DisasJumpType op_vsra(DisasContext *s, DisasOps *o)
2097
+{
2098
+    TCGv_i64 shift = tcg_temp_new_i64();
2099
+
2100
+    read_vec_element_i64(shift, get_field(s->fields, v3), 7, ES_8);
2101
+    if (s->fields->op2 == 0x7e) {
2102
+        tcg_gen_andi_i64(shift, shift, 0x7);
2103
+    } else {
2104
+        tcg_gen_andi_i64(shift, shift, 0x78);
2105
+    }
2106
+
2107
+    gen_gvec_2i_ool(get_field(s->fields, v1), get_field(s->fields, v2),
2108
+                    shift, 0, gen_helper_gvec_vsra);
2109
+    tcg_temp_free_i64(shift);
2110
+    return DISAS_NEXT;
2111
+}
2112
+
2113
+static DisasJumpType op_vsrl(DisasContext *s, DisasOps *o)
2114
+{
2115
+    TCGv_i64 shift = tcg_temp_new_i64();
2116
+
2117
+    read_vec_element_i64(shift, get_field(s->fields, v3), 7, ES_8);
2118
+    if (s->fields->op2 == 0x7c) {
2119
+        tcg_gen_andi_i64(shift, shift, 0x7);
2120
+    } else {
2121
+        tcg_gen_andi_i64(shift, shift, 0x78);
2122
+    }
2123
+
2124
+    gen_gvec_2i_ool(get_field(s->fields, v1), get_field(s->fields, v2),
2125
+                    shift, 0, gen_helper_gvec_vsrl);
2126
+    tcg_temp_free_i64(shift);
2127
+    return DISAS_NEXT;
2128
+}
2129
+
2130
+static DisasJumpType op_vs(DisasContext *s, DisasOps *o)
2131
+{
2132
+    const uint8_t es = get_field(s->fields, m4);
2133
+
2134
+    if (es > ES_128) {
2135
+        gen_program_exception(s, PGM_SPECIFICATION);
2136
+        return DISAS_NORETURN;
2137
+    } else if (es == ES_128) {
2138
+        gen_gvec128_3_i64(tcg_gen_sub2_i64, get_field(s->fields, v1),
2139
+                          get_field(s->fields, v2), get_field(s->fields, v3));
2140
+        return DISAS_NEXT;
2141
+    }
2142
+    gen_gvec_fn_3(sub, es, get_field(s->fields, v1), get_field(s->fields, v2),
2143
+                  get_field(s->fields, v3));
2144
+    return DISAS_NEXT;
2145
+}
2146
+
2147
+static void gen_scbi_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
2148
+{
2149
+    tcg_gen_setcond_i32(TCG_COND_LTU, d, a, b);
2150
+}
2151
+
2152
+static void gen_scbi_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
2153
+{
2154
+    tcg_gen_setcond_i64(TCG_COND_LTU, d, a, b);
2155
+}
2156
+
2157
+static void gen_scbi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
2158
+                          TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
2159
+{
2160
+    TCGv_i64 th = tcg_temp_new_i64();
2161
+    TCGv_i64 tl = tcg_temp_new_i64();
2162
+    TCGv_i64 zero = tcg_const_i64(0);
2163
+
2164
+    tcg_gen_sub2_i64(tl, th, al, zero, bl, zero);
2165
+    tcg_gen_andi_i64(th, th, 1);
2166
+    tcg_gen_sub2_i64(tl, th, ah, zero, th, zero);
2167
+    tcg_gen_sub2_i64(tl, th, tl, th, bh, zero);
2168
+    tcg_gen_andi_i64(dl, th, 1);
2169
+    tcg_gen_mov_i64(dh, zero);
2170
+
2171
+    tcg_temp_free_i64(th);
2172
+    tcg_temp_free_i64(tl);
2173
+    tcg_temp_free_i64(zero);
2174
+}
2175
+
2176
+static DisasJumpType op_vscbi(DisasContext *s, DisasOps *o)
2177
+{
2178
+    const uint8_t es = get_field(s->fields, m4);
2179
+    static const GVecGen3 g[4] = {
2180
+        { .fno = gen_helper_gvec_vscbi8, },
2181
+        { .fno = gen_helper_gvec_vscbi16, },
2182
+        { .fni4 = gen_scbi_i32, },
2183
+        { .fni8 = gen_scbi_i64, },
2184
+    };
2185
+
2186
+    if (es > ES_128) {
2187
+        gen_program_exception(s, PGM_SPECIFICATION);
2188
+        return DISAS_NORETURN;
2189
+    } else if (es == ES_128) {
2190
+        gen_gvec128_3_i64(gen_scbi2_i64, get_field(s->fields, v1),
2191
+                          get_field(s->fields, v2), get_field(s->fields, v3));
2192
+        return DISAS_NEXT;
2193
+    }
2194
+    gen_gvec_3(get_field(s->fields, v1), get_field(s->fields, v2),
2195
+               get_field(s->fields, v3), &g[es]);
2196
+    return DISAS_NEXT;
2197
+}
2198
+
2199
+static void gen_sbi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
2200
+                         TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch)
2201
+{
2202
+    TCGv_i64 tl = tcg_temp_new_i64();
2203
+    TCGv_i64 zero = tcg_const_i64(0);
2204
+
2205
+    tcg_gen_andi_i64(tl, cl, 1);
2206
+    tcg_gen_sub2_i64(dl, dh, al, ah, bl, bh);
2207
+    tcg_gen_sub2_i64(dl, dh, dl, dh, tl, zero);
2208
+    tcg_temp_free_i64(tl);
2209
+    tcg_temp_free_i64(zero);
2210
+}
2211
+
2212
+static DisasJumpType op_vsbi(DisasContext *s, DisasOps *o)
2213
+{
2214
+    if (get_field(s->fields, m5) != ES_128) {
2215
+        gen_program_exception(s, PGM_SPECIFICATION);
2216
+        return DISAS_NORETURN;
2217
+    }
2218
+
2219
+    gen_gvec128_4_i64(gen_sbi2_i64, get_field(s->fields, v1),
2220
+                      get_field(s->fields, v2), get_field(s->fields, v3),
2221
+                      get_field(s->fields, v4));
2222
+    return DISAS_NEXT;
2223
+}
2224
+
2225
+static void gen_sbcbi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
2226
+                           TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch)
2227
+{
2228
+    TCGv_i64 th = tcg_temp_new_i64();
2229
+    TCGv_i64 tl = tcg_temp_new_i64();
2230
+    TCGv_i64 zero = tcg_const_i64(0);
2231
+
2232
+    tcg_gen_andi_i64(tl, cl, 1);
2233
+    tcg_gen_sub2_i64(tl, th, al, zero, tl, zero);
2234
+    tcg_gen_sub2_i64(tl, th, tl, th, bl, zero);
2235
+    tcg_gen_andi_i64(th, th, 1);
2236
+    tcg_gen_sub2_i64(tl, th, ah, zero, th, zero);
2237
+    tcg_gen_sub2_i64(tl, th, tl, th, bh, zero);
2238
+    tcg_gen_andi_i64(dl, th, 1);
2239
+    tcg_gen_mov_i64(dh, zero);
2240
+
2241
+    tcg_temp_free_i64(tl);
2242
+    tcg_temp_free_i64(th);
2243
+    tcg_temp_free_i64(zero);
2244
+}
2245
+
2246
+static DisasJumpType op_vsbcbi(DisasContext *s, DisasOps *o)
2247
+{
2248
+    if (get_field(s->fields, m5) != ES_128) {
2249
+        gen_program_exception(s, PGM_SPECIFICATION);
2250
+        return DISAS_NORETURN;
2251
+    }
2252
+
2253
+    gen_gvec128_4_i64(gen_sbcbi2_i64, get_field(s->fields, v1),
2254
+                      get_field(s->fields, v2), get_field(s->fields, v3),
2255
+                      get_field(s->fields, v4));
2256
+    return DISAS_NEXT;
2257
+}
2258
+
2259
+static DisasJumpType op_vsumg(DisasContext *s, DisasOps *o)
2260
+{
2261
+    const uint8_t es = get_field(s->fields, m4);
2262
+    TCGv_i64 sum, tmp;
2263
+    uint8_t dst_idx;
2264
+
2265
+    if (es == ES_8 || es > ES_32) {
2266
+        gen_program_exception(s, PGM_SPECIFICATION);
2267
+        return DISAS_NORETURN;
2268
+    }
2269
+
2270
+    sum = tcg_temp_new_i64();
2271
+    tmp = tcg_temp_new_i64();
2272
+    for (dst_idx = 0; dst_idx < 2; dst_idx++) {
2273
+        uint8_t idx = dst_idx * NUM_VEC_ELEMENTS(es) / 2;
2274
+        const uint8_t max_idx = idx + NUM_VEC_ELEMENTS(es) / 2 - 1;
2275
+
2276
+        read_vec_element_i64(sum, get_field(s->fields, v3), max_idx, es);
2277
+        for (; idx <= max_idx; idx++) {
2278
+            read_vec_element_i64(tmp, get_field(s->fields, v2), idx, es);
2279
+            tcg_gen_add_i64(sum, sum, tmp);
2280
+        }
2281
+        write_vec_element_i64(sum, get_field(s->fields, v1), dst_idx, ES_64);
2282
+    }
2283
+    tcg_temp_free_i64(sum);
2284
+    tcg_temp_free_i64(tmp);
2285
+    return DISAS_NEXT;
2286
+}
2287
+
2288
+static DisasJumpType op_vsumq(DisasContext *s, DisasOps *o)
2289
+{
2290
+    const uint8_t es = get_field(s->fields, m4);
2291
+    const uint8_t max_idx = NUM_VEC_ELEMENTS(es) - 1;
2292
+    TCGv_i64 sumh, suml, zero, tmpl;
2293
+    uint8_t idx;
2294
+
2295
+    if (es < ES_32 || es > ES_64) {
2296
+        gen_program_exception(s, PGM_SPECIFICATION);
2297
+        return DISAS_NORETURN;
2298
+    }
2299
+
2300
+    sumh = tcg_const_i64(0);
2301
+    suml = tcg_temp_new_i64();
2302
+    zero = tcg_const_i64(0);
2303
+    tmpl = tcg_temp_new_i64();
2304
+
2305
+    read_vec_element_i64(suml, get_field(s->fields, v3), max_idx, es);
2306
+    for (idx = 0; idx <= max_idx; idx++) {
2307
+        read_vec_element_i64(tmpl, get_field(s->fields, v2), idx, es);
2308
+        tcg_gen_add2_i64(suml, sumh, suml, sumh, tmpl, zero);
2309
+    }
2310
+    write_vec_element_i64(sumh, get_field(s->fields, v1), 0, ES_64);
2311
+    write_vec_element_i64(suml, get_field(s->fields, v1), 1, ES_64);
2312
+
2313
+    tcg_temp_free_i64(sumh);
2314
+    tcg_temp_free_i64(suml);
2315
+    tcg_temp_free_i64(zero);
2316
+    tcg_temp_free_i64(tmpl);
2317
+    return DISAS_NEXT;
2318
+}
2319
+
2320
+static DisasJumpType op_vsum(DisasContext *s, DisasOps *o)
2321
+{
2322
+    const uint8_t es = get_field(s->fields, m4);
2323
+    TCGv_i32 sum, tmp;
2324
+    uint8_t dst_idx;
2325
+
2326
+    if (es > ES_16) {
2327
+        gen_program_exception(s, PGM_SPECIFICATION);
2328
+        return DISAS_NORETURN;
2329
+    }
2330
+
2331
+    sum = tcg_temp_new_i32();
2332
+    tmp = tcg_temp_new_i32();
2333
+    for (dst_idx = 0; dst_idx < 4; dst_idx++) {
2334
+        uint8_t idx = dst_idx * NUM_VEC_ELEMENTS(es) / 4;
2335
+        const uint8_t max_idx = idx + NUM_VEC_ELEMENTS(es) / 4 - 1;
2336
+
2337
+        read_vec_element_i32(sum, get_field(s->fields, v3), max_idx, es);
2338
+        for (; idx <= max_idx; idx++) {
2339
+            read_vec_element_i32(tmp, get_field(s->fields, v2), idx, es);
2340
+            tcg_gen_add_i32(sum, sum, tmp);
2341
+        }
2342
+        write_vec_element_i32(sum, get_field(s->fields, v1), dst_idx, ES_32);
2343
+    }
2344
+    tcg_temp_free_i32(sum);
2345
+    tcg_temp_free_i32(tmp);
2346
+    return DISAS_NEXT;
2347
+}
2348
+
2349
+static DisasJumpType op_vtm(DisasContext *s, DisasOps *o)
2350
+{
2351
+    gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
2352
+                   cpu_env, 0, gen_helper_gvec_vtm);
2353
+    set_cc_static(s);
2354
+    return DISAS_NEXT;
2355
+}

+ 616
- 0
target/s390x/vec_int_helper.c View File

@@ -0,0 +1,616 @@
1
+/*
2
+ * QEMU TCG support -- s390x vector integer instruction support
3
+ *
4
+ * Copyright (C) 2019 Red Hat Inc
5
+ *
6
+ * Authors:
7
+ *   David Hildenbrand <david@redhat.com>
8
+ *
9
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
10
+ * See the COPYING file in the top-level directory.
11
+ */
12
+#include "qemu/osdep.h"
13
+#include "qemu-common.h"
14
+#include "cpu.h"
15
+#include "vec.h"
16
+#include "exec/helper-proto.h"
17
+#include "tcg/tcg-gvec-desc.h"
18
+
19
+static bool s390_vec_is_zero(const S390Vector *v)
20
+{
21
+    return !v->doubleword[0] && !v->doubleword[1];
22
+}
23
+
24
+static void s390_vec_xor(S390Vector *res, const S390Vector *a,
25
+                         const S390Vector *b)
26
+{
27
+    res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0];
28
+    res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1];
29
+}
30
+
31
+static void s390_vec_and(S390Vector *res, const S390Vector *a,
32
+                         const S390Vector *b)
33
+{
34
+    res->doubleword[0] = a->doubleword[0] & b->doubleword[0];
35
+    res->doubleword[1] = a->doubleword[1] & b->doubleword[1];
36
+}
37
+
38
+static bool s390_vec_equal(const S390Vector *a, const S390Vector *b)
39
+{
40
+    return a->doubleword[0] == b->doubleword[0] &&
41
+           a->doubleword[1] == b->doubleword[1];
42
+}
43
+
44
+static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count)
45
+{
46
+    uint64_t tmp;
47
+
48
+    g_assert(count < 128);
49
+    if (count == 0) {
50
+        d->doubleword[0] = a->doubleword[0];
51
+        d->doubleword[1] = a->doubleword[1];
52
+    } else if (count == 64) {
53
+        d->doubleword[0] = a->doubleword[1];
54
+        d->doubleword[1] = 0;
55
+    } else if (count < 64) {
56
+        tmp = extract64(a->doubleword[1], 64 - count, count);
57
+        d->doubleword[1] = a->doubleword[1] << count;
58
+        d->doubleword[0] = (a->doubleword[0] << count) | tmp;
59
+    } else {
60
+        d->doubleword[0] = a->doubleword[1] << (count - 64);
61
+        d->doubleword[1] = 0;
62
+    }
63
+}
64
+
65
+static void s390_vec_sar(S390Vector *d, const S390Vector *a, uint64_t count)
66
+{
67
+    uint64_t tmp;
68
+
69
+    if (count == 0) {
70
+        d->doubleword[0] = a->doubleword[0];
71
+        d->doubleword[1] = a->doubleword[1];
72
+    } else if (count == 64) {
73
+        d->doubleword[1] = a->doubleword[0];
74
+        d->doubleword[0] = 0;
75
+    } else if (count < 64) {
76
+        tmp = a->doubleword[1] >> count;
77
+        d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
78
+        d->doubleword[0] = (int64_t)a->doubleword[0] >> count;
79
+    } else {
80
+        d->doubleword[1] = (int64_t)a->doubleword[0] >> (count - 64);
81
+        d->doubleword[0] = 0;
82
+    }
83
+}
84
+
85
+static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count)
86
+{
87
+    uint64_t tmp;
88
+
89
+    g_assert(count < 128);
90
+    if (count == 0) {
91
+        d->doubleword[0] = a->doubleword[0];
92
+        d->doubleword[1] = a->doubleword[1];
93
+    } else if (count == 64) {
94
+        d->doubleword[1] = a->doubleword[0];
95
+        d->doubleword[0] = 0;
96
+    } else if (count < 64) {
97
+        tmp = a->doubleword[1] >> count;
98
+        d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
99
+        d->doubleword[0] = a->doubleword[0] >> count;
100
+    } else {
101
+        d->doubleword[1] = a->doubleword[0] >> (count - 64);
102
+        d->doubleword[0] = 0;
103
+    }
104
+}
105
+#define DEF_VAVG(BITS)                                                         \
106
+void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3,         \
107
+                             uint32_t desc)                                    \
108
+{                                                                              \
109
+    int i;                                                                     \
110
+                                                                               \
111
+    for (i = 0; i < (128 / BITS); i++) {                                       \
112
+        const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
113
+        const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
114
+                                                                               \
115
+        s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1);                 \
116
+    }                                                                          \
117
+}
118
+DEF_VAVG(8)
119
+DEF_VAVG(16)
120
+
121
+#define DEF_VAVGL(BITS)                                                        \
122
+void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3,        \
123
+                              uint32_t desc)                                   \
124
+{                                                                              \
125
+    int i;                                                                     \
126
+                                                                               \
127
+    for (i = 0; i < (128 / BITS); i++) {                                       \
128
+        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
129
+        const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
130
+                                                                               \
131
+        s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1);                 \
132
+    }                                                                          \
133
+}
134
+DEF_VAVGL(8)
135
+DEF_VAVGL(16)
136
+
137
+#define DEF_VCLZ(BITS)                                                         \
138
+void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc)          \
139
+{                                                                              \
140
+    int i;                                                                     \
141
+                                                                               \
142
+    for (i = 0; i < (128 / BITS); i++) {                                       \
143
+        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
144
+                                                                               \
145
+        s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS);             \
146
+    }                                                                          \
147
+}
148
+DEF_VCLZ(8)
149
+DEF_VCLZ(16)
150
+
151
+#define DEF_VCTZ(BITS)                                                         \
152
+void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc)          \
153
+{                                                                              \
154
+    int i;                                                                     \
155
+                                                                               \
156
+    for (i = 0; i < (128 / BITS); i++) {                                       \
157
+        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
158
+                                                                               \
159
+        s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS);              \
160
+    }                                                                          \
161
+}
162
+DEF_VCTZ(8)
163
+DEF_VCTZ(16)
164
+
165
+/* like binary multiplication, but XOR instead of addition */
166
+#define DEF_GALOIS_MULTIPLY(BITS, TBITS)                                       \
167
+static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a,                \
168
+                                             uint##TBITS##_t b)                \
169
+{                                                                              \
170
+    uint##TBITS##_t res = 0;                                                   \
171
+                                                                               \
172
+    while (b) {                                                                \
173
+        if (b & 0x1) {                                                         \
174
+            res = res ^ a;                                                     \
175
+        }                                                                      \
176
+        a = a << 1;                                                            \
177
+        b = b >> 1;                                                            \
178
+    }                                                                          \
179
+    return res;                                                                \
180
+}
181
+DEF_GALOIS_MULTIPLY(8, 16)
182
+DEF_GALOIS_MULTIPLY(16, 32)
183
+DEF_GALOIS_MULTIPLY(32, 64)
184
+
185
+static S390Vector galois_multiply64(uint64_t a, uint64_t b)
186
+{
187
+    S390Vector res = {};
188
+    S390Vector va = {
189
+        .doubleword[1] = a,
190
+    };
191
+    S390Vector vb = {
192
+        .doubleword[1] = b,
193
+    };
194
+
195
+    while (!s390_vec_is_zero(&vb)) {
196
+        if (vb.doubleword[1] & 0x1) {
197
+            s390_vec_xor(&res, &res, &va);
198
+        }
199
+        s390_vec_shl(&va, &va, 1);
200
+        s390_vec_shr(&vb, &vb, 1);
201
+    }
202
+    return res;
203
+}
204
+
205
+#define DEF_VGFM(BITS, TBITS)                                                  \
206
+void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3,         \
207
+                             uint32_t desc)                                    \
208
+{                                                                              \
209
+    int i;                                                                     \
210
+                                                                               \
211
+    for (i = 0; i < (128 / TBITS); i++) {                                      \
212
+        uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2);             \
213
+        uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2);             \
214
+        uint##TBITS##_t d = galois_multiply##BITS(a, b);                       \
215
+                                                                               \
216
+        a = s390_vec_read_element##BITS(v2, i * 2 + 1);                        \
217
+        b = s390_vec_read_element##BITS(v3, i * 2 + 1);                        \
218
+        d = d ^ galois_multiply32(a, b);                                       \
219
+        s390_vec_write_element##TBITS(v1, i, d);                               \
220
+    }                                                                          \
221
+}
222
+DEF_VGFM(8, 16)
223
+DEF_VGFM(16, 32)
224
+DEF_VGFM(32, 64)
225
+
226
+void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3,
227
+                         uint32_t desc)
228
+{
229
+    S390Vector tmp1, tmp2;
230
+    uint64_t a, b;
231
+
232
+    a = s390_vec_read_element64(v2, 0);
233
+    b = s390_vec_read_element64(v3, 0);
234
+    tmp1 = galois_multiply64(a, b);
235
+    a = s390_vec_read_element64(v2, 1);
236
+    b = s390_vec_read_element64(v3, 1);
237
+    tmp2 = galois_multiply64(a, b);
238
+    s390_vec_xor(v1, &tmp1, &tmp2);
239
+}
240
+
241
+#define DEF_VGFMA(BITS, TBITS)                                                 \
242
+void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3,        \
243
+                              const void *v4, uint32_t desc)                   \
244
+{                                                                              \
245
+    int i;                                                                     \
246
+                                                                               \
247
+    for (i = 0; i < (128 / TBITS); i++) {                                      \
248
+        uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2);             \
249
+        uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2);             \
250
+        uint##TBITS##_t d = galois_multiply##BITS(a, b);                       \
251
+                                                                               \
252
+        a = s390_vec_read_element##BITS(v2, i * 2 + 1);                        \
253
+        b = s390_vec_read_element##BITS(v3, i * 2 + 1);                        \
254
+        d = d ^ galois_multiply32(a, b);                                       \
255
+        d = d ^ s390_vec_read_element##TBITS(v4, i);                           \
256
+        s390_vec_write_element##TBITS(v1, i, d);                               \
257
+    }                                                                          \
258
+}
259
+DEF_VGFMA(8, 16)
260
+DEF_VGFMA(16, 32)
261
+DEF_VGFMA(32, 64)
262
+
263
+void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3,
264
+                          const void *v4, uint32_t desc)
265
+{
266
+    S390Vector tmp1, tmp2;
267
+    uint64_t a, b;
268
+
269
+    a = s390_vec_read_element64(v2, 0);
270
+    b = s390_vec_read_element64(v3, 0);
271
+    tmp1 = galois_multiply64(a, b);
272
+    a = s390_vec_read_element64(v2, 1);
273
+    b = s390_vec_read_element64(v3, 1);
274
+    tmp2 = galois_multiply64(a, b);
275
+    s390_vec_xor(&tmp1, &tmp1, &tmp2);
276
+    s390_vec_xor(v1, &tmp1, v4);
277
+}
278
+
279
+#define DEF_VMAL(BITS)                                                         \
280
+void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3,         \
281
+                             const void *v4, uint32_t desc)                    \
282
+{                                                                              \
283
+    int i;                                                                     \
284
+                                                                               \
285
+    for (i = 0; i < (128 / BITS); i++) {                                       \
286
+        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
287
+        const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
288
+        const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i);           \
289
+                                                                               \
290
+        s390_vec_write_element##BITS(v1, i, a * b + c);                        \
291
+    }                                                                          \
292
+}
293
+DEF_VMAL(8)
294
+DEF_VMAL(16)
295
+
296
+#define DEF_VMAH(BITS)                                                         \
297
+void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3,         \
298
+                             const void *v4, uint32_t desc)                    \
299
+{                                                                              \
300
+    int i;                                                                     \
301
+                                                                               \
302
+    for (i = 0; i < (128 / BITS); i++) {                                       \
303
+        const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
304
+        const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
305
+        const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i);   \
306
+                                                                               \
307
+        s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS);              \
308
+    }                                                                          \
309
+}
310
+DEF_VMAH(8)
311
+DEF_VMAH(16)
312
+
313
+#define DEF_VMALH(BITS)                                                        \
314
+void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3,        \
315
+                              const void *v4, uint32_t desc)                   \
316
+{                                                                              \
317
+    int i;                                                                     \
318
+                                                                               \
319
+    for (i = 0; i < (128 / BITS); i++) {                                       \
320
+        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
321
+        const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
322
+        const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i);           \
323
+                                                                               \
324
+        s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS);              \
325
+    }                                                                          \
326
+}
327
+DEF_VMALH(8)
328
+DEF_VMALH(16)
329
+
330
+#define DEF_VMAE(BITS, TBITS)                                                  \
331
+void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3,         \
332
+                             const void *v4, uint32_t desc)                    \
333
+{                                                                              \
334
+    int i, j;                                                                  \
335
+                                                                               \
336
+    for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
337
+        int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
338
+        int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
339
+        int##TBITS##_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, j);  \
340
+                                                                               \
341
+        s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
342
+    }                                                                          \
343
+}
344
+DEF_VMAE(8, 16)
345
+DEF_VMAE(16, 32)
346
+DEF_VMAE(32, 64)
347
+
348
+#define DEF_VMALE(BITS, TBITS)                                                 \
349
+void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3,        \
350
+                              const void *v4, uint32_t desc)                   \
351
+{                                                                              \
352
+    int i, j;                                                                  \
353
+                                                                               \
354
+    for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
355
+        uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);                \
356
+        uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);                \
357
+        uint##TBITS##_t c = s390_vec_read_element##BITS(v4, j);                \
358
+                                                                               \
359
+        s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
360
+    }                                                                          \
361
+}
362
+DEF_VMALE(8, 16)
363
+DEF_VMALE(16, 32)
364
+DEF_VMALE(32, 64)
365
+
366
+#define DEF_VMAO(BITS, TBITS)                                                  \
367
+void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3,         \
368
+                             const void *v4, uint32_t desc)                    \
369
+{                                                                              \
370
+    int i, j;                                                                  \
371
+                                                                               \
372
+    for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
373
+        int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
374
+        int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
375
+        int##TBITS##_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, j);  \
376
+                                                                               \
377
+        s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
378
+    }                                                                          \
379
+}
380
+DEF_VMAO(8, 16)
381
+DEF_VMAO(16, 32)
382
+DEF_VMAO(32, 64)
383
+
384
+#define DEF_VMALO(BITS, TBITS)                                                 \
385
+void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3,        \
386
+                              const void *v4, uint32_t desc)                   \
387
+{                                                                              \
388
+    int i, j;                                                                  \
389
+                                                                               \
390
+    for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
391
+        uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);                \
392
+        uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);                \
393
+        uint##TBITS##_t c = s390_vec_read_element##BITS(v4, j);                \
394
+                                                                               \
395
+        s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
396
+    }                                                                          \
397
+}
398
+DEF_VMALO(8, 16)
399
+DEF_VMALO(16, 32)
400
+DEF_VMALO(32, 64)
401
+
402
+#define DEF_VMH(BITS)                                                          \
403
+void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3,          \
404
+                            uint32_t desc)                                     \
405
+{                                                                              \
406
+    int i;                                                                     \
407
+                                                                               \
408
+    for (i = 0; i < (128 / BITS); i++) {                                       \
409
+        const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
410
+        const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
411
+                                                                               \
412
+        s390_vec_write_element##BITS(v1, i, (a * b) >> BITS);                  \
413
+    }                                                                          \
414
+}
415
+DEF_VMH(8)
416
+DEF_VMH(16)
417
+
418
+#define DEF_VMLH(BITS)                                                         \
419
+void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3,         \
420
+                             uint32_t desc)                                    \
421
+{                                                                              \
422
+    int i;                                                                     \
423
+                                                                               \
424
+    for (i = 0; i < (128 / BITS); i++) {                                       \
425
+        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
426
+        const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
427
+                                                                               \
428
+        s390_vec_write_element##BITS(v1, i, (a * b) >> BITS);                  \
429
+    }                                                                          \
430
+}
431
+DEF_VMLH(8)
432
+DEF_VMLH(16)
433
+
434
+#define DEF_VME(BITS, TBITS)                                                   \
435
+void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3,          \
436
+                            uint32_t desc)                                     \
437
+{                                                                              \
438
+    int i, j;                                                                  \
439
+                                                                               \
440
+    for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
441
+        int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
442
+        int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
443
+                                                                               \
444
+        s390_vec_write_element##TBITS(v1, i, a * b);                           \
445
+    }                                                                          \
446
+}
447
+DEF_VME(8, 16)
448
+DEF_VME(16, 32)
449
+DEF_VME(32, 64)
450
+
451
+#define DEF_VMLE(BITS, TBITS)                                                  \
452
+void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3,         \
453
+                             uint32_t desc)                                    \
454
+{                                                                              \
455
+    int i, j;                                                                  \
456
+                                                                               \
457
+    for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
458
+        const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);          \
459
+        const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);          \
460
+                                                                               \
461
+        s390_vec_write_element##TBITS(v1, i, a * b);                           \
462
+    }                                                                          \
463
+}
464
+DEF_VMLE(8, 16)
465
+DEF_VMLE(16, 32)
466
+DEF_VMLE(32, 64)
467
+
468
+#define DEF_VMO(BITS, TBITS)                                                   \
469
+void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3,          \
470
+                            uint32_t desc)                                     \
471
+{                                                                              \
472
+    int i, j;                                                                  \
473
+                                                                               \
474
+    for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
475
+        int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
476
+        int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
477
+                                                                               \
478
+        s390_vec_write_element##TBITS(v1, i, a * b);                           \
479
+    }                                                                          \
480
+}
481
+DEF_VMO(8, 16)
482
+DEF_VMO(16, 32)
483
+DEF_VMO(32, 64)
484
+
485
+#define DEF_VMLO(BITS, TBITS)                                                  \
486
+void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3,         \
487
+                             uint32_t desc)                                    \
488
+{                                                                              \
489
+    int i, j;                                                                  \
490
+                                                                               \
491
+    for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
492
+        const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);          \
493
+        const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);          \
494
+                                                                               \
495
+        s390_vec_write_element##TBITS(v1, i, a * b);                           \
496
+    }                                                                          \
497
+}
498
+DEF_VMLO(8, 16)
499
+DEF_VMLO(16, 32)
500
+DEF_VMLO(32, 64)
501
+
502
+#define DEF_VPOPCT(BITS)                                                       \
503
+void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc)        \
504
+{                                                                              \
505
+    int i;                                                                     \
506
+                                                                               \
507
+    for (i = 0; i < (128 / BITS); i++) {                                       \
508
+        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
509
+                                                                               \
510
+        s390_vec_write_element##BITS(v1, i, ctpop32(a));                       \
511
+    }                                                                          \
512
+}
513
+DEF_VPOPCT(8)
514
+DEF_VPOPCT(16)
515
+
516
+#define DEF_VERLLV(BITS)                                                       \
517
+void HELPER(gvec_verllv##BITS)(void *v1, const void *v2, const void *v3,       \
518
+                               uint32_t desc)                                  \
519
+{                                                                              \
520
+    int i;                                                                     \
521
+                                                                               \
522
+    for (i = 0; i < (128 / BITS); i++) {                                       \
523
+        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
524
+        const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
525
+                                                                               \
526
+        s390_vec_write_element##BITS(v1, i, rol##BITS(a, b));                  \
527
+    }                                                                          \
528
+}
529
+DEF_VERLLV(8)
530
+DEF_VERLLV(16)
531
+
532
+#define DEF_VERLL(BITS)                                                        \
533
+void HELPER(gvec_verll##BITS)(void *v1, const void *v2, uint64_t count,        \
534
+                              uint32_t desc)                                   \
535
+{                                                                              \
536
+    int i;                                                                     \
537
+                                                                               \
538
+    for (i = 0; i < (128 / BITS); i++) {                                       \
539
+        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
540
+                                                                               \
541
+        s390_vec_write_element##BITS(v1, i, rol##BITS(a, count));              \
542
+    }                                                                          \
543
+}
544
+DEF_VERLL(8)
545
+DEF_VERLL(16)
546
+
547
+#define DEF_VERIM(BITS)                                                        \
548
+void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3,        \
549
+                              uint32_t desc)                                   \
550
+{                                                                              \
551
+    const uint8_t count = simd_data(desc);                                     \
552
+    int i;                                                                     \
553
+                                                                               \
554
+    for (i = 0; i < (128 / BITS); i++) {                                       \
555
+        const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i);           \
556
+        const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i);           \
557
+        const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i);        \
558
+        const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask);   \
559
+                                                                               \
560
+        s390_vec_write_element##BITS(v1, i, d);                                \
561
+    }                                                                          \
562
+}
563
+DEF_VERIM(8)
564
+DEF_VERIM(16)
565
+
566
+void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t count,
567
+                      uint32_t desc)
568
+{
569
+    s390_vec_shl(v1, v2, count);
570
+}
571
+
572
+void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count,
573
+                       uint32_t desc)
574
+{
575
+    s390_vec_sar(v1, v2, count);
576
+}
577
+
578
+void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count,
579
+                       uint32_t desc)
580
+{
581
+    s390_vec_shr(v1, v2, count);
582
+}
583
+
584
+#define DEF_VSCBI(BITS)                                                        \
585
+void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3,        \
586
+                              uint32_t desc)                                   \
587
+{                                                                              \
588
+    int i;                                                                     \
589
+                                                                               \
590
+    for (i = 0; i < (128 / BITS); i++) {                                       \
591
+        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
592
+        const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
593
+                                                                               \
594
+        s390_vec_write_element##BITS(v1, i, a < b);                            \
595
+    }                                                                          \
596
+}
597
+DEF_VSCBI(8)
598
+DEF_VSCBI(16)
599
+
600
+void HELPER(gvec_vtm)(void *v1, const void *v2, CPUS390XState *env,
601
+                      uint32_t desc)
602
+{
603
+    S390Vector tmp;
604
+
605
+    s390_vec_and(&tmp, v1, v2);
606
+    if (s390_vec_is_zero(&tmp)) {
607
+        /* Selected bits all zeros; or all mask bits zero */
608
+        env->cc_op = 0;
609
+    } else if (s390_vec_equal(&tmp, v2)) {
610
+        /* Selected bits all ones */
611
+        env->cc_op = 3;
612
+    } else {
613
+        /* Selected bits a mix of zeros and ones */
614
+        env->cc_op = 1;
615
+    }
616
+}

Loading…
Cancel
Save