• R/O
  • HTTP
  • SSH
  • HTTPS

提交

標籤
無標籤

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

Commit MetaInfo

修訂adb196cbd5cff26547bc32a208074f03f4c4a627 (tree)
時間2018-03-16 01:55:04
作者Richard Henderson <richard.henderson@lina...>
CommiterRichard Henderson

Log Message

tcg: Add choose_vector_size

This unifies 5 copies of checks for supported vector size,
and in the process fixes a missing check in tcg_gen_gvec_2s.

This lead to an assertion failure for 64-bit vector multiply,
which is not available in the AVX instruction set.

Suggested-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

Change Summary

差異

--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -351,6 +351,42 @@ static void gen_dup_i64(unsigned vece, TCGv_i64 out, TCGv_i64 in)
351351 }
352352 }
353353
354+/* Select a supported vector type for implementing an operation on SIZE
355+ * bytes. If OP is 0, assume that the real operation to be performed is
356+ * required by all backends. Otherwise, make sure than OP can be performed
357+ * on elements of size VECE in the selected type. Do not select V64 if
358+ * PREFER_I64 is true. Return 0 if no vector type is selected.
359+ */
360+static TCGType choose_vector_type(TCGOpcode op, unsigned vece, uint32_t size,
361+ bool prefer_i64)
362+{
363+ if (TCG_TARGET_HAS_v256 && check_size_impl(size, 32)) {
364+ if (op == 0) {
365+ return TCG_TYPE_V256;
366+ }
367+ /* Recall that ARM SVE allows vector sizes that are not a
368+ * power of 2, but always a multiple of 16. The intent is
369+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
370+ * It is hard to imagine a case in which v256 is supported
371+ * but v128 is not, but check anyway.
372+ */
373+ if (tcg_can_emit_vec_op(op, TCG_TYPE_V256, vece)
374+ && (size % 32 == 0
375+ || tcg_can_emit_vec_op(op, TCG_TYPE_V128, vece))) {
376+ return TCG_TYPE_V256;
377+ }
378+ }
379+ if (TCG_TARGET_HAS_v128 && check_size_impl(size, 16)
380+ && (op == 0 || tcg_can_emit_vec_op(op, TCG_TYPE_V128, vece))) {
381+ return TCG_TYPE_V128;
382+ }
383+ if (TCG_TARGET_HAS_v64 && !prefer_i64 && check_size_impl(size, 8)
384+ && (op == 0 || tcg_can_emit_vec_op(op, TCG_TYPE_V64, vece))) {
385+ return TCG_TYPE_V64;
386+ }
387+ return 0;
388+}
389+
354390 /* Set OPRSZ bytes at DOFS to replications of IN_32, IN_64 or IN_C.
355391 * Only one of IN_32 or IN_64 may be set;
356392 * IN_C is used if IN_32 and IN_64 are unset.
@@ -376,19 +412,12 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
376412 }
377413 }
378414
379- type = 0;
380- if (TCG_TARGET_HAS_v256 && check_size_impl(oprsz, 32)) {
381- type = TCG_TYPE_V256;
382- } else if (TCG_TARGET_HAS_v128 && check_size_impl(oprsz, 16)) {
383- type = TCG_TYPE_V128;
384- } else if (TCG_TARGET_HAS_v64 && check_size_impl(oprsz, 8)
385- /* Prefer integer when 64-bit host and no variable dup. */
386- && !(TCG_TARGET_REG_BITS == 64 && in_32 == NULL
387- && (in_64 == NULL || vece == MO_64))) {
388- type = TCG_TYPE_V64;
389- }
390-
391- /* Implement inline with a vector type, if possible. */
415+ /* Implement inline with a vector type, if possible.
416+ * Prefer integer when 64-bit host and no variable dup.
417+ */
418+ type = choose_vector_type(0, vece, oprsz,
419+ (TCG_TARGET_REG_BITS == 64 && in_32 == NULL
420+ && (in_64 == NULL || vece == MO_64)));
392421 if (type != 0) {
393422 TCGv_vec t_vec = tcg_temp_new_vec(type);
394423
@@ -414,21 +443,30 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
414443 }
415444
416445 i = 0;
417- if (TCG_TARGET_HAS_v256) {
446+ switch (type) {
447+ case TCG_TYPE_V256:
448+ /* Recall that ARM SVE allows vector sizes that are not a
449+ * power of 2, but always a multiple of 16. The intent is
450+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
451+ */
418452 for (; i + 32 <= oprsz; i += 32) {
419453 tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V256);
420454 }
421- }
422- if (TCG_TARGET_HAS_v128) {
455+ /* fallthru */
456+ case TCG_TYPE_V128:
423457 for (; i + 16 <= oprsz; i += 16) {
424458 tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V128);
425459 }
426- }
427- if (TCG_TARGET_HAS_v64) {
460+ break;
461+ case TCG_TYPE_V64:
428462 for (; i < oprsz; i += 8) {
429463 tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V64);
430464 }
465+ break;
466+ default:
467+ g_assert_not_reached();
431468 }
469+
432470 tcg_temp_free_vec(t_vec);
433471 goto done;
434472 }
@@ -484,7 +522,7 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
484522 }
485523 tcg_temp_free_i64(t_64);
486524 goto done;
487- }
525+ }
488526 }
489527
490528 /* Otherwise implement out of line. */
@@ -866,49 +904,55 @@ static void expand_4_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
866904 void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
867905 uint32_t oprsz, uint32_t maxsz, const GVecGen2 *g)
868906 {
907+ TCGType type;
908+ uint32_t some;
909+
869910 check_size_align(oprsz, maxsz, dofs | aofs);
870911 check_overlap_2(dofs, aofs, maxsz);
871912
872- /* Recall that ARM SVE allows vector sizes that are not a power of 2.
873- Expand with successively smaller host vector sizes. The intent is
874- that e.g. oprsz == 80 would be expanded with 2x32 + 1x16. */
875- /* ??? For maxsz > oprsz, the host may be able to use an opr-sized
876- operation, zeroing the balance of the register. We can then
877- use a max-sized store to implement the clearing without an extra
878- store operation. This is true for aarch64 and x86_64 hosts. */
879-
880- if (TCG_TARGET_HAS_v256 && g->fniv && check_size_impl(oprsz, 32)
881- && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V256, g->vece))) {
882- uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32);
913+ type = 0;
914+ if (g->fniv) {
915+ type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64);
916+ }
917+ switch (type) {
918+ case TCG_TYPE_V256:
919+ /* Recall that ARM SVE allows vector sizes that are not a
920+ * power of 2, but always a multiple of 16. The intent is
921+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
922+ */
923+ some = QEMU_ALIGN_DOWN(oprsz, 32);
883924 expand_2_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256, g->fniv);
884925 if (some == oprsz) {
885- goto done;
926+ break;
886927 }
887928 dofs += some;
888929 aofs += some;
889930 oprsz -= some;
890931 maxsz -= some;
891- }
892-
893- if (TCG_TARGET_HAS_v128 && g->fniv && check_size_impl(oprsz, 16)
894- && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V128, g->vece))) {
932+ /* fallthru */
933+ case TCG_TYPE_V128:
895934 expand_2_vec(g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128, g->fniv);
896- } else if (TCG_TARGET_HAS_v64 && !g->prefer_i64
897- && g->fniv && check_size_impl(oprsz, 8)
898- && (!g->opc
899- || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V64, g->vece))) {
935+ break;
936+ case TCG_TYPE_V64:
900937 expand_2_vec(g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64, g->fniv);
901- } else if (g->fni8 && check_size_impl(oprsz, 8)) {
902- expand_2_i64(dofs, aofs, oprsz, g->fni8);
903- } else if (g->fni4 && check_size_impl(oprsz, 4)) {
904- expand_2_i32(dofs, aofs, oprsz, g->fni4);
905- } else {
906- assert(g->fno != NULL);
907- tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, g->data, g->fno);
908- return;
938+ break;
939+
940+ case 0:
941+ if (g->fni8 && check_size_impl(oprsz, 8)) {
942+ expand_2_i64(dofs, aofs, oprsz, g->fni8);
943+ } else if (g->fni4 && check_size_impl(oprsz, 4)) {
944+ expand_2_i32(dofs, aofs, oprsz, g->fni4);
945+ } else {
946+ assert(g->fno != NULL);
947+ tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, g->data, g->fno);
948+ return;
949+ }
950+ break;
951+
952+ default:
953+ g_assert_not_reached();
909954 }
910955
911- done:
912956 if (oprsz < maxsz) {
913957 expand_clr(dofs + oprsz, maxsz - oprsz);
914958 }
@@ -918,53 +962,64 @@ void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
918962 void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
919963 uint32_t maxsz, int64_t c, const GVecGen2i *g)
920964 {
965+ TCGType type;
966+ uint32_t some;
967+
921968 check_size_align(oprsz, maxsz, dofs | aofs);
922969 check_overlap_2(dofs, aofs, maxsz);
923970
924- /* Recall that ARM SVE allows vector sizes that are not a power of 2.
925- Expand with successively smaller host vector sizes. The intent is
926- that e.g. oprsz == 80 would be expanded with 2x32 + 1x16. */
927-
928- if (TCG_TARGET_HAS_v256 && g->fniv && check_size_impl(oprsz, 32)
929- && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V256, g->vece))) {
930- uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32);
971+ type = 0;
972+ if (g->fniv) {
973+ type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64);
974+ }
975+ switch (type) {
976+ case TCG_TYPE_V256:
977+ /* Recall that ARM SVE allows vector sizes that are not a
978+ * power of 2, but always a multiple of 16. The intent is
979+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
980+ */
981+ some = QEMU_ALIGN_DOWN(oprsz, 32);
931982 expand_2i_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256,
932983 c, g->load_dest, g->fniv);
933984 if (some == oprsz) {
934- goto done;
985+ break;
935986 }
936987 dofs += some;
937988 aofs += some;
938989 oprsz -= some;
939990 maxsz -= some;
940- }
941-
942- if (TCG_TARGET_HAS_v128 && g->fniv && check_size_impl(oprsz, 16)
943- && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V128, g->vece))) {
991+ /* fallthru */
992+ case TCG_TYPE_V128:
944993 expand_2i_vec(g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128,
945994 c, g->load_dest, g->fniv);
946- } else if (TCG_TARGET_HAS_v64 && !g->prefer_i64
947- && g->fniv && check_size_impl(oprsz, 8)
948- && (!g->opc
949- || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V64, g->vece))) {
995+ break;
996+ case TCG_TYPE_V64:
950997 expand_2i_vec(g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64,
951998 c, g->load_dest, g->fniv);
952- } else if (g->fni8 && check_size_impl(oprsz, 8)) {
953- expand_2i_i64(dofs, aofs, oprsz, c, g->load_dest, g->fni8);
954- } else if (g->fni4 && check_size_impl(oprsz, 4)) {
955- expand_2i_i32(dofs, aofs, oprsz, c, g->load_dest, g->fni4);
956- } else {
957- if (g->fno) {
958- tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, c, g->fno);
999+ break;
1000+
1001+ case 0:
1002+ if (g->fni8 && check_size_impl(oprsz, 8)) {
1003+ expand_2i_i64(dofs, aofs, oprsz, c, g->load_dest, g->fni8);
1004+ } else if (g->fni4 && check_size_impl(oprsz, 4)) {
1005+ expand_2i_i32(dofs, aofs, oprsz, c, g->load_dest, g->fni4);
9591006 } else {
960- TCGv_i64 tcg_c = tcg_const_i64(c);
961- tcg_gen_gvec_2i_ool(dofs, aofs, tcg_c, oprsz, maxsz, c, g->fnoi);
962- tcg_temp_free_i64(tcg_c);
1007+ if (g->fno) {
1008+ tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, c, g->fno);
1009+ } else {
1010+ TCGv_i64 tcg_c = tcg_const_i64(c);
1011+ tcg_gen_gvec_2i_ool(dofs, aofs, tcg_c, oprsz,
1012+ maxsz, c, g->fnoi);
1013+ tcg_temp_free_i64(tcg_c);
1014+ }
1015+ return;
9631016 }
964- return;
1017+ break;
1018+
1019+ default:
1020+ g_assert_not_reached();
9651021 }
9661022
967- done:
9681023 if (oprsz < maxsz) {
9691024 expand_clr(dofs + oprsz, maxsz - oprsz);
9701025 }
@@ -981,37 +1036,30 @@ void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
9811036
9821037 type = 0;
9831038 if (g->fniv) {
984- if (TCG_TARGET_HAS_v256 && check_size_impl(oprsz, 32)) {
985- type = TCG_TYPE_V256;
986- } else if (TCG_TARGET_HAS_v128 && check_size_impl(oprsz, 16)) {
987- type = TCG_TYPE_V128;
988- } else if (TCG_TARGET_HAS_v64 && !g->prefer_i64
989- && check_size_impl(oprsz, 8)) {
990- type = TCG_TYPE_V64;
991- }
1039+ type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64);
9921040 }
9931041 if (type != 0) {
9941042 TCGv_vec t_vec = tcg_temp_new_vec(type);
1043+ uint32_t some;
9951044
9961045 tcg_gen_dup_i64_vec(g->vece, t_vec, c);
9971046
998- /* Recall that ARM SVE allows vector sizes that are not a power of 2.
999- Expand with successively smaller host vector sizes. The intent is
1000- that e.g. oprsz == 80 would be expanded with 2x32 + 1x16. */
10011047 switch (type) {
10021048 case TCG_TYPE_V256:
1003- {
1004- uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32);
1005- expand_2s_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256,
1006- t_vec, g->scalar_first, g->fniv);
1007- if (some == oprsz) {
1008- break;
1009- }
1010- dofs += some;
1011- aofs += some;
1012- oprsz -= some;
1013- maxsz -= some;
1049+ /* Recall that ARM SVE allows vector sizes that are not a
1050+ * power of 2, but always a multiple of 16. The intent is
1051+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
1052+ */
1053+ some = QEMU_ALIGN_DOWN(oprsz, 32);
1054+ expand_2s_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256,
1055+ t_vec, g->scalar_first, g->fniv);
1056+ if (some == oprsz) {
1057+ break;
10141058 }
1059+ dofs += some;
1060+ aofs += some;
1061+ oprsz -= some;
1062+ maxsz -= some;
10151063 /* fallthru */
10161064
10171065 case TCG_TYPE_V128:
@@ -1055,48 +1103,60 @@ void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
10551103 void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs,
10561104 uint32_t oprsz, uint32_t maxsz, const GVecGen3 *g)
10571105 {
1106+ TCGType type;
1107+ uint32_t some;
1108+
10581109 check_size_align(oprsz, maxsz, dofs | aofs | bofs);
10591110 check_overlap_3(dofs, aofs, bofs, maxsz);
10601111
1061- /* Recall that ARM SVE allows vector sizes that are not a power of 2.
1062- Expand with successively smaller host vector sizes. The intent is
1063- that e.g. oprsz == 80 would be expanded with 2x32 + 1x16. */
1064-
1065- if (TCG_TARGET_HAS_v256 && g->fniv && check_size_impl(oprsz, 32)
1066- && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V256, g->vece))) {
1067- uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32);
1112+ type = 0;
1113+ if (g->fniv) {
1114+ type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64);
1115+ }
1116+ switch (type) {
1117+ case TCG_TYPE_V256:
1118+ /* Recall that ARM SVE allows vector sizes that are not a
1119+ * power of 2, but always a multiple of 16. The intent is
1120+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
1121+ */
1122+ some = QEMU_ALIGN_DOWN(oprsz, 32);
10681123 expand_3_vec(g->vece, dofs, aofs, bofs, some, 32, TCG_TYPE_V256,
10691124 g->load_dest, g->fniv);
10701125 if (some == oprsz) {
1071- goto done;
1126+ break;
10721127 }
10731128 dofs += some;
10741129 aofs += some;
10751130 bofs += some;
10761131 oprsz -= some;
10771132 maxsz -= some;
1078- }
1079-
1080- if (TCG_TARGET_HAS_v128 && g->fniv && check_size_impl(oprsz, 16)
1081- && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V128, g->vece))) {
1133+ /* fallthru */
1134+ case TCG_TYPE_V128:
10821135 expand_3_vec(g->vece, dofs, aofs, bofs, oprsz, 16, TCG_TYPE_V128,
10831136 g->load_dest, g->fniv);
1084- } else if (TCG_TARGET_HAS_v64 && !g->prefer_i64
1085- && g->fniv && check_size_impl(oprsz, 8)
1086- && (!g->opc
1087- || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V64, g->vece))) {
1137+ break;
1138+ case TCG_TYPE_V64:
10881139 expand_3_vec(g->vece, dofs, aofs, bofs, oprsz, 8, TCG_TYPE_V64,
10891140 g->load_dest, g->fniv);
1090- } else if (g->fni8 && check_size_impl(oprsz, 8)) {
1091- expand_3_i64(dofs, aofs, bofs, oprsz, g->load_dest, g->fni8);
1092- } else if (g->fni4 && check_size_impl(oprsz, 4)) {
1093- expand_3_i32(dofs, aofs, bofs, oprsz, g->load_dest, g->fni4);
1094- } else {
1095- assert(g->fno != NULL);
1096- tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, maxsz, g->data, g->fno);
1141+ break;
1142+
1143+ case 0:
1144+ if (g->fni8 && check_size_impl(oprsz, 8)) {
1145+ expand_3_i64(dofs, aofs, bofs, oprsz, g->load_dest, g->fni8);
1146+ } else if (g->fni4 && check_size_impl(oprsz, 4)) {
1147+ expand_3_i32(dofs, aofs, bofs, oprsz, g->load_dest, g->fni4);
1148+ } else {
1149+ assert(g->fno != NULL);
1150+ tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz,
1151+ maxsz, g->data, g->fno);
1152+ return;
1153+ }
1154+ break;
1155+
1156+ default:
1157+ g_assert_not_reached();
10971158 }
10981159
1099- done:
11001160 if (oprsz < maxsz) {
11011161 expand_clr(dofs + oprsz, maxsz - oprsz);
11021162 }
@@ -1106,20 +1166,27 @@ void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs,
11061166 void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs,
11071167 uint32_t oprsz, uint32_t maxsz, const GVecGen4 *g)
11081168 {
1169+ TCGType type;
1170+ uint32_t some;
1171+
11091172 check_size_align(oprsz, maxsz, dofs | aofs | bofs | cofs);
11101173 check_overlap_4(dofs, aofs, bofs, cofs, maxsz);
11111174
1112- /* Recall that ARM SVE allows vector sizes that are not a power of 2.
1113- Expand with successively smaller host vector sizes. The intent is
1114- that e.g. oprsz == 80 would be expanded with 2x32 + 1x16. */
1115-
1116- if (TCG_TARGET_HAS_v256 && g->fniv && check_size_impl(oprsz, 32)
1117- && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V256, g->vece))) {
1118- uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32);
1175+ type = 0;
1176+ if (g->fniv) {
1177+ type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64);
1178+ }
1179+ switch (type) {
1180+ case TCG_TYPE_V256:
1181+ /* Recall that ARM SVE allows vector sizes that are not a
1182+ * power of 2, but always a multiple of 16. The intent is
1183+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
1184+ */
1185+ some = QEMU_ALIGN_DOWN(oprsz, 32);
11191186 expand_4_vec(g->vece, dofs, aofs, bofs, cofs, some,
11201187 32, TCG_TYPE_V256, g->fniv);
11211188 if (some == oprsz) {
1122- goto done;
1189+ break;
11231190 }
11241191 dofs += some;
11251192 aofs += some;
@@ -1127,30 +1194,33 @@ void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs,
11271194 cofs += some;
11281195 oprsz -= some;
11291196 maxsz -= some;
1130- }
1131-
1132- if (TCG_TARGET_HAS_v128 && g->fniv && check_size_impl(oprsz, 16)
1133- && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V128, g->vece))) {
1197+ /* fallthru */
1198+ case TCG_TYPE_V128:
11341199 expand_4_vec(g->vece, dofs, aofs, bofs, cofs, oprsz,
11351200 16, TCG_TYPE_V128, g->fniv);
1136- } else if (TCG_TARGET_HAS_v64 && !g->prefer_i64
1137- && g->fniv && check_size_impl(oprsz, 8)
1138- && (!g->opc
1139- || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V64, g->vece))) {
1201+ break;
1202+ case TCG_TYPE_V64:
11401203 expand_4_vec(g->vece, dofs, aofs, bofs, cofs, oprsz,
11411204 8, TCG_TYPE_V64, g->fniv);
1142- } else if (g->fni8 && check_size_impl(oprsz, 8)) {
1143- expand_4_i64(dofs, aofs, bofs, cofs, oprsz, g->fni8);
1144- } else if (g->fni4 && check_size_impl(oprsz, 4)) {
1145- expand_4_i32(dofs, aofs, bofs, cofs, oprsz, g->fni4);
1146- } else {
1147- assert(g->fno != NULL);
1148- tcg_gen_gvec_4_ool(dofs, aofs, bofs, cofs,
1149- oprsz, maxsz, g->data, g->fno);
1150- return;
1205+ break;
1206+
1207+ case 0:
1208+ if (g->fni8 && check_size_impl(oprsz, 8)) {
1209+ expand_4_i64(dofs, aofs, bofs, cofs, oprsz, g->fni8);
1210+ } else if (g->fni4 && check_size_impl(oprsz, 4)) {
1211+ expand_4_i32(dofs, aofs, bofs, cofs, oprsz, g->fni4);
1212+ } else {
1213+ assert(g->fno != NULL);
1214+ tcg_gen_gvec_4_ool(dofs, aofs, bofs, cofs,
1215+ oprsz, maxsz, g->data, g->fno);
1216+ return;
1217+ }
1218+ break;
1219+
1220+ default:
1221+ g_assert_not_reached();
11511222 }
11521223
1153- done:
11541224 if (oprsz < maxsz) {
11551225 expand_clr(dofs + oprsz, maxsz - oprsz);
11561226 }
@@ -2155,6 +2225,8 @@ void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
21552225 [TCG_COND_LTU] = ltu_fn,
21562226 [TCG_COND_LEU] = leu_fn,
21572227 };
2228+ TCGType type;
2229+ uint32_t some;
21582230
21592231 check_size_align(oprsz, maxsz, dofs | aofs | bofs);
21602232 check_overlap_3(dofs, aofs, bofs, maxsz);
@@ -2165,51 +2237,59 @@ void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
21652237 return;
21662238 }
21672239
2168- /* Recall that ARM SVE allows vector sizes that are not a power of 2.
2169- Expand with successively smaller host vector sizes. The intent is
2170- that e.g. oprsz == 80 would be expanded with 2x32 + 1x16. */
2171-
2172- if (TCG_TARGET_HAS_v256 && check_size_impl(oprsz, 32)
2173- && tcg_can_emit_vec_op(INDEX_op_cmp_vec, TCG_TYPE_V256, vece)) {
2174- uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32);
2240+ /* Implement inline with a vector type, if possible.
2241+ * Prefer integer when 64-bit host and 64-bit comparison.
2242+ */
2243+ type = choose_vector_type(INDEX_op_cmp_vec, vece, oprsz,
2244+ TCG_TARGET_REG_BITS == 64 && vece == MO_64);
2245+ switch (type) {
2246+ case TCG_TYPE_V256:
2247+ /* Recall that ARM SVE allows vector sizes that are not a
2248+ * power of 2, but always a multiple of 16. The intent is
2249+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
2250+ */
2251+ some = QEMU_ALIGN_DOWN(oprsz, 32);
21752252 expand_cmp_vec(vece, dofs, aofs, bofs, some, 32, TCG_TYPE_V256, cond);
21762253 if (some == oprsz) {
2177- goto done;
2254+ break;
21782255 }
21792256 dofs += some;
21802257 aofs += some;
21812258 bofs += some;
21822259 oprsz -= some;
21832260 maxsz -= some;
2184- }
2185-
2186- if (TCG_TARGET_HAS_v128 && check_size_impl(oprsz, 16)
2187- && tcg_can_emit_vec_op(INDEX_op_cmp_vec, TCG_TYPE_V128, vece)) {
2261+ /* fallthru */
2262+ case TCG_TYPE_V128:
21882263 expand_cmp_vec(vece, dofs, aofs, bofs, oprsz, 16, TCG_TYPE_V128, cond);
2189- } else if (TCG_TARGET_HAS_v64
2190- && check_size_impl(oprsz, 8)
2191- && (TCG_TARGET_REG_BITS == 32 || vece != MO_64)
2192- && tcg_can_emit_vec_op(INDEX_op_cmp_vec, TCG_TYPE_V64, vece)) {
2264+ break;
2265+ case TCG_TYPE_V64:
21932266 expand_cmp_vec(vece, dofs, aofs, bofs, oprsz, 8, TCG_TYPE_V64, cond);
2194- } else if (vece == MO_64 && check_size_impl(oprsz, 8)) {
2195- expand_cmp_i64(dofs, aofs, bofs, oprsz, cond);
2196- } else if (vece == MO_32 && check_size_impl(oprsz, 4)) {
2197- expand_cmp_i32(dofs, aofs, bofs, oprsz, cond);
2198- } else {
2199- gen_helper_gvec_3 * const *fn = fns[cond];
2200-
2201- if (fn == NULL) {
2202- uint32_t tmp;
2203- tmp = aofs, aofs = bofs, bofs = tmp;
2204- cond = tcg_swap_cond(cond);
2205- fn = fns[cond];
2206- assert(fn != NULL);
2267+ break;
2268+
2269+ case 0:
2270+ if (vece == MO_64 && check_size_impl(oprsz, 8)) {
2271+ expand_cmp_i64(dofs, aofs, bofs, oprsz, cond);
2272+ } else if (vece == MO_32 && check_size_impl(oprsz, 4)) {
2273+ expand_cmp_i32(dofs, aofs, bofs, oprsz, cond);
2274+ } else {
2275+ gen_helper_gvec_3 * const *fn = fns[cond];
2276+
2277+ if (fn == NULL) {
2278+ uint32_t tmp;
2279+ tmp = aofs, aofs = bofs, bofs = tmp;
2280+ cond = tcg_swap_cond(cond);
2281+ fn = fns[cond];
2282+ assert(fn != NULL);
2283+ }
2284+ tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, maxsz, 0, fn[vece]);
2285+ return;
22072286 }
2208- tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, maxsz, 0, fn[vece]);
2209- return;
2287+ break;
2288+
2289+ default:
2290+ g_assert_not_reached();
22102291 }
22112292
2212- done:
22132293 if (oprsz < maxsz) {
22142294 expand_clr(dofs + oprsz, maxsz - oprsz);
22152295 }