diff --git a/gcc.spec b/gcc.spec index 7ad9ac7bbeb52f2e26c4ad3d353d025eeb7f4a47..a0056cbd6c6e103bc76306f6ab8d1d02cfd96571 100644 --- a/gcc.spec +++ b/gcc.spec @@ -1,7 +1,7 @@ %define anolis_release .0.1 %{?scl_package:%global scl gcc-toolset-12} %global scl_prefix gcc-toolset-12- -BuildRequires: scl-utils-build +BuildRequires: scl-utils-build %global __python /usr/bin/python3 %{?scl:%global __strip %%{_scl_root}/usr/bin/strip} %{?scl:%global __objdump %%{_scl_root}/usr/bin/objdump} @@ -148,13 +148,13 @@ BuildRequires: scl-utils-build %else %global build_annobin_plugin 0 %endif -Summary: GCC version 12 -Name: %{?scl_prefix}gcc -Version: %{gcc_version} -Release: %{gcc_release}.6%{anolis_release}%{?dist} +Summary: GCC version 12 +Name: %{?scl_prefix}gcc +Version: %{gcc_version} +Release: %{gcc_release}.8%{anolis_release}%{?dist} # libgcc, libgfortran, libgomp, libstdc++ and crtstuff have # GCC Runtime Exception. -License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD +License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD # The source for this package was pulled from upstream's vcs. # %%{gitrev} is some commit from the # https://gcc.gnu.org/git/?p=gcc.git;h=refs/vendors/redhat/heads/gcc-%%{gcc_major}-branch @@ -163,27 +163,27 @@ License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2 # git --git-dir=gcc-dir.tmp/.git fetch --depth 1 origin %%{gitrev} # git --git-dir=gcc-dir.tmp/.git archive --prefix=%%{name}-%%{version}-%%{DATE}/ %%{gitrev} | xz -9e > %%{name}-%%{version}-%%{DATE}.tar.xz # rm -rf gcc-dir.tmp -Source0: gcc-%{version}-%{DATE}.tar.xz -Source1: https://gcc.gnu.org/pub/gcc/infrastructure/isl-%{isl_version}.tar.bz2 -Source2: http://www.multiprecision.org/mpc/download/mpc-%{mpc_version}.tar.gz -Source3: ftp://ftp.stack.nl/pub/users/dimitri/doxygen-%{doxygen_version}.src.tar.gz +Source0: gcc-%{version}-%{DATE}.tar.xz +Source1: https://gcc.gnu.org/pub/gcc/infrastructure/isl-%{isl_version}.tar.bz2 +Source2: http://www.multiprecision.org/mpc/download/mpc-%{mpc_version}.tar.gz +Source3: ftp://ftp.stack.nl/pub/users/dimitri/doxygen-%{doxygen_version}.src.tar.gz # The source for nvptx-tools package was pulled from upstream's vcs. Use the # following commands to generate the tarball: # git clone --depth 1 git://github.com/MentorEmbedded/nvptx-tools.git nvptx-tools-dir.tmp # git --git-dir=nvptx-tools-dir.tmp/.git fetch --depth 1 origin %%{nvptx_tools_gitrev} # git --git-dir=nvptx-tools-dir.tmp/.git archive --prefix=nvptx-tools-%%{nvptx_tools_gitrev}/ %%{nvptx_tools_gitrev} | xz -9e > nvptx-tools-%%{nvptx_tools_gitrev}.tar.xz # rm -rf nvptx-tools-dir.tmp -Source4: nvptx-tools-%{nvptx_tools_gitrev}.tar.xz +Source4: nvptx-tools-%{nvptx_tools_gitrev}.tar.xz # The source for nvptx-newlib package was pulled from upstream's vcs. Use the # following commands to generate the tarball: # git clone git://sourceware.org/git/newlib-cygwin.git newlib-cygwin-dir.tmp # git --git-dir=newlib-cygwin-dir.tmp/.git archive --prefix=newlib-cygwin-%%{newlib_cygwin_gitrev}/ %%{newlib_cygwin_gitrev} ":(exclude)newlib/libc/sys/linux/include/rpc/*.[hx]" | xz -9e > newlib-cygwin-%%{newlib_cygwin_gitrev}.tar.xz # rm -rf newlib-cygwin-dir.tmp -Source5: newlib-cygwin-%{newlib_cygwin_gitrev}.tar.xz -Source6: libgomp_nonshared.c -Source7: http://gcc.gnu.org/pub/gcc/infrastructure/mpfr-%{mpfr_version}.tar.bz2 -Source8: http://gcc.gnu.org/pub/gcc/infrastructure/gmp-%{gmp_version}.tar.bz2 -URL: http://gcc.gnu.org +Source5: newlib-cygwin-%{newlib_cygwin_gitrev}.tar.xz +Source6: libgomp_nonshared.c +Source7: http://gcc.gnu.org/pub/gcc/infrastructure/mpfr-%{mpfr_version}.tar.bz2 +Source8: http://gcc.gnu.org/pub/gcc/infrastructure/gmp-%{gmp_version}.tar.bz2 +URL: http://gcc.gnu.org # Need binutils with -pie support >= 2.14.90.0.4-4 # Need binutils which can omit dot symbols and overlap .opd on ppc64 >= 2.15.91.0.2-4 # Need binutils which handle -msecure-plt on ppc >= 2.16.91.0.2-2 @@ -197,41 +197,41 @@ URL: http://gcc.gnu.org # Need binutils which support -plugin # Need binutils which support .loc view >= 2.30 # Need binutils which support --generate-missing-build-notes=yes >= 2.31 -BuildRequires: %{?scl_prefix}binutils >= 2.31 -BuildRequires: %{?scl_prefix}gdb >= 7.4.50 +BuildRequires: %{?scl_prefix}binutils >= 2.31 +BuildRequires: %{?scl_prefix}gdb >= 7.4.50 # While gcc doesn't include statically linked binaries, during testing # -static is used several times. -BuildRequires: glibc-static -BuildRequires: zlib-devel, gettext, dejagnu, bison, flex, sharutils -BuildRequires: texinfo, texinfo-tex, /usr/bin/pod2man +BuildRequires: glibc-static +BuildRequires: zlib-devel, gettext, dejagnu, bison, flex, sharutils +BuildRequires: texinfo, texinfo-tex, /usr/bin/pod2man #BuildRequires: systemtap-sdt-devel >= 1.3 #BuildRequires: gmp-devel >= 4.1.2-8, mpfr-devel >= 3.1.0, libmpc-devel >= 0.8.1 #BuildRequires: python3-devel, /usr/bin/python -BuildRequires: gcc, gcc-c++, make +BuildRequires: gcc, gcc-c++, make %if 0%{?rhel} == 7 -BuildRequires: python3 +BuildRequires: python3 %endif # For VTA guality testing -BuildRequires: gdb +BuildRequires: gdb # Make sure pthread.h doesn't contain __thread tokens # Make sure glibc supports stack protector # Make sure glibc supports DT_GNU_HASH -BuildRequires: glibc-devel >= 2.4.90-13 -BuildRequires: elfutils-devel >= 0.147 -BuildRequires: elfutils-libelf-devel >= 0.147 +BuildRequires: glibc-devel >= 2.4.90-13 +BuildRequires: elfutils-devel >= 0.147 +BuildRequires: elfutils-libelf-devel >= 0.147 %if 0%{?rhel} >= 8 -BuildRequires: libzstd-devel +BuildRequires: libzstd-devel %endif %ifarch ppc ppc64 ppc64le ppc64p7 s390 s390x sparc sparcv9 alpha # Make sure glibc supports TFmode long double -BuildRequires: glibc >= 2.3.90-35 +BuildRequires: glibc >= 2.3.90-35 %endif %ifarch %{multilib_64_archs} sparcv9 ppc # Ensure glibc{,-devel} is installed for both multilib arches -BuildRequires: /lib/libc.so.6 /usr/lib/libc.so /lib64/libc.so.6 /usr/lib64/libc.so +BuildRequires: /lib/libc.so.6 /usr/lib/libc.so /lib64/libc.so.6 /usr/lib64/libc.so %endif %ifarch ia64 -BuildRequires: libunwind >= 0.98 +BuildRequires: libunwind >= 0.98 %endif # Need .eh_frame ld optimizations # Need proper visibility support @@ -249,55 +249,55 @@ BuildRequires: libunwind >= 0.98 # Need binutils that support -plugin # Need binutils that support .loc view >= 2.30 # Need binutils which support --generate-missing-build-notes=yes >= 2.31 -Requires: %{?scl_prefix}binutils >= 2.22.52.0.1 +Requires: %{?scl_prefix}binutils >= 2.22.52.0.1 # Make sure gdb will understand DW_FORM_strp -Conflicts: gdb < 5.1-2 -Requires: glibc-devel >= 2.2.90-12 +Conflicts: gdb < 5.1-2 +Requires: glibc-devel >= 2.2.90-12 %ifarch ppc ppc64 ppc64le ppc64p7 s390 s390x sparc sparcv9 alpha # Make sure glibc supports TFmode long double -Requires: glibc >= 2.3.90-35 +Requires: glibc >= 2.3.90-35 %endif %if 0%{?rhel} >= 7 -BuildRequires: gmp-devel >= 4.3.2 -BuildRequires: mpfr-devel >= 3.1.0 -BuildRequires: libmpc-devel >= 0.8.1 +BuildRequires: gmp-devel >= 4.3.2 +BuildRequires: mpfr-devel >= 3.1.0 +BuildRequires: libmpc-devel >= 0.8.1 %endif %if %{build_libstdcxx_docs} -BuildRequires: libxml2 -BuildRequires: graphviz +BuildRequires: libxml2 +BuildRequires: graphviz %if 0%{?rhel} < 7 # doxygen BRs -BuildRequires: perl -BuildRequires: texlive-dvips, texlive-utils, texlive-latex -BuildRequires: ghostscript +BuildRequires: perl +BuildRequires: texlive-dvips, texlive-utils, texlive-latex +BuildRequires: ghostscript %endif %if 0%{?rhel} >= 7 -BuildRequires: doxygen >= 1.7.1 -BuildRequires: dblatex, texlive-collection-latex, docbook-style-xsl +BuildRequires: doxygen >= 1.7.1 +BuildRequires: dblatex, texlive-collection-latex, docbook-style-xsl %endif %endif # See the build section for why this is needed. %if 0%{?rhel} == 6 -BuildRequires: devtoolset-11-runtime devtoolset-11-binutils -BuildRequires: devtoolset-11-gcc devtoolset-11-gcc-c++ +BuildRequires: devtoolset-11-runtime devtoolset-11-binutils +BuildRequires: devtoolset-11-gcc devtoolset-11-gcc-c++ %endif -Requires: libgcc >= 4.1.2-43 -Requires: libgomp >= 4.4.4-13 +Requires: libgcc >= 4.1.2-43 +Requires: libgomp >= 4.4.4-13 # lto-wrapper invokes make -Requires: make +Requires: make %{?scl:Requires:%scl_runtime} -AutoReq: true +AutoReq: true # Various libraries are imported. #1859893 asks us to list them all. -Provides: bundled(libiberty) -Provides: bundled(libbacktrace) -Provides: bundled(libffi) -Provides: gcc(major) = %{gcc_major} +Provides: bundled(libiberty) +Provides: bundled(libbacktrace) +Provides: bundled(libffi) +Provides: gcc(major) = %{gcc_major} %ifarch sparc64 ppc64 ppc64le s390x x86_64 ia64 aarch64 -Provides: liblto_plugin.so.0()(64bit) +Provides: liblto_plugin.so.0()(64bit) %else -Provides: liblto_plugin.so.0 +Provides: liblto_plugin.so.0 %endif %global oformat %{nil} %global oformat2 %{nil} @@ -333,63 +333,66 @@ Provides: liblto_plugin.so.0 %global oformat OUTPUT_FORMAT(elf64-littleaarch64) %endif %if 0%{?rhel} == 6 -ExclusiveArch: x86_64 %{ix86} +ExclusiveArch: x86_64 %{ix86} %endif %if 0%{?rhel} == 7 -ExcludeArch: aarch64 -%endif - -Patch0: gcc12-hack.patch -Patch2: gcc12-sparc-config-detection.patch -Patch3: gcc12-libgomp-omp_h-multilib.patch -Patch4: gcc12-libtool-no-rpath.patch -Patch5: gcc12-isl-dl.patch -Patch6: gcc12-isl-dl2.patch -Patch7: gcc12-libstdc++-docs.patch -Patch8: gcc12-no-add-needed.patch -Patch9: gcc12-Wno-format-security.patch -Patch10: gcc12-rh1574936.patch -Patch11: gcc12-d-shared-libphobos.patch -Patch12: gcc12-pr107468.patch -Patch15: gcc12-static-libquadmath.patch -Patch16: gcc12-FMA-chains.patch -Patch17: gcc12-pr113960.patch - -Patch100: gcc12-fortran-fdec-duplicates.patch -Patch101: gcc12-fortran-flogical-as-integer.patch -Patch102: gcc12-fortran-fdec-override-kind.patch -Patch103: gcc12-fortran-fdec-non-logical-if.patch - -Patch1000: gcc12-libstdc++-compat.patch -Patch1001: gcc12-alt-compat-test.patch -Patch1002: gcc12-libgfortran-compat.patch - -Patch2001: doxygen-1.7.1-config.patch -Patch2002: doxygen-1.7.5-timestamp.patch -Patch2003: doxygen-1.8.0-rh856725.patch -Patch2004: isl-rh2155127.patch - -Patch3000: 0001-basic_string-reserve-n-semantics-are-not-available-i.patch -Patch3001: 0004-operator-istream-char-N-eofbit-fixes-are-not-availab.patch -Patch3002: 0005-Disable-tests-for-PR-libstdc-79820-and-PR-libstdc-81.patch -Patch3003: 0006-Don-t-assume-has_facet-codecvt_c16-when-run-against-.patch -Patch3004: 0008-testsuite-build-plugins-with-std-c-11.patch -Patch3005: 0009-Fix-22_locale-locale-cons-unicode.cc-when-run-under-.patch -Patch3006: 0010-Don-t-verify-exception-handling-in-basic_filebuf-clo.patch -Patch3007: 0011-Add-dts.exp-and-use-it-to-fix-22_locale-messages-136.patch -Patch3008: 0012-dts.exp-use-usr-bin-gcc.patch -Patch3009: 0013-Rename-__CXXSTDLIB_SO_VERSION__-to-__LIBSTDCXX_SO_VE.patch -Patch3010: 0014-Conditionalize-tests-for-PR-libstdc-98466-on-__LIBST.patch -Patch3011: 0015-Conditionalize-test-for-PR-libstdc-87135-on-__LIBSTD.patch -Patch3012: 0016-Conditionalize-test-for-hashtable-bucket-sizes-on-__.patch -Patch3013: 0017-Conditionalize-test-for-PR-libstdc-71181-on-__LIBSTD.patch -Patch3014: gcc12-dg-ice-fixes.patch -Patch3015: 0018-Use-CXX11-ABI.patch -Patch3016: 0019-xfails.patch -Patch3017: 0020-more-fixes.patch -Patch3018: 0021-libstdc++-disable-tests.patch -Patch3019: 0022-libstdc++-revert-behavior.patch -Patch3020: gcc12-testsuite-typo.patch +ExcludeArch: aarch64 +%endif + +Patch0: gcc12-hack.patch +Patch2: gcc12-sparc-config-detection.patch +Patch3: gcc12-libgomp-omp_h-multilib.patch +Patch4: gcc12-libtool-no-rpath.patch +Patch5: gcc12-isl-dl.patch +Patch6: gcc12-isl-dl2.patch +Patch7: gcc12-libstdc++-docs.patch +Patch8: gcc12-no-add-needed.patch +Patch9: gcc12-Wno-format-security.patch +Patch10: gcc12-rh1574936.patch +Patch11: gcc12-d-shared-libphobos.patch +Patch12: gcc12-pr107468.patch +Patch15: gcc12-static-libquadmath.patch +Patch16: gcc12-FMA-chains.patch +Patch17: gcc12-pr113960.patch +Patch18: gcc12-vector-merge-1.patch +Patch19: gcc12-vector-merge-2.patch +Patch20: gcc12-vector-merge-3.patch + +Patch100: gcc12-fortran-fdec-duplicates.patch +Patch101: gcc12-fortran-flogical-as-integer.patch +Patch102: gcc12-fortran-fdec-override-kind.patch +Patch103: gcc12-fortran-fdec-non-logical-if.patch + +Patch1000: gcc12-libstdc++-compat.patch +Patch1001: gcc12-alt-compat-test.patch +Patch1002: gcc12-libgfortran-compat.patch + +Patch2001: doxygen-1.7.1-config.patch +Patch2002: doxygen-1.7.5-timestamp.patch +Patch2003: doxygen-1.8.0-rh856725.patch +Patch2004: isl-rh2155127.patch + +Patch3000: 0001-basic_string-reserve-n-semantics-are-not-available-i.patch +Patch3001: 0004-operator-istream-char-N-eofbit-fixes-are-not-availab.patch +Patch3002: 0005-Disable-tests-for-PR-libstdc-79820-and-PR-libstdc-81.patch +Patch3003: 0006-Don-t-assume-has_facet-codecvt_c16-when-run-against-.patch +Patch3004: 0008-testsuite-build-plugins-with-std-c-11.patch +Patch3005: 0009-Fix-22_locale-locale-cons-unicode.cc-when-run-under-.patch +Patch3006: 0010-Don-t-verify-exception-handling-in-basic_filebuf-clo.patch +Patch3007: 0011-Add-dts.exp-and-use-it-to-fix-22_locale-messages-136.patch +Patch3008: 0012-dts.exp-use-usr-bin-gcc.patch +Patch3009: 0013-Rename-__CXXSTDLIB_SO_VERSION__-to-__LIBSTDCXX_SO_VE.patch +Patch3010: 0014-Conditionalize-tests-for-PR-libstdc-98466-on-__LIBST.patch +Patch3011: 0015-Conditionalize-test-for-PR-libstdc-87135-on-__LIBSTD.patch +Patch3012: 0016-Conditionalize-test-for-hashtable-bucket-sizes-on-__.patch +Patch3013: 0017-Conditionalize-test-for-PR-libstdc-71181-on-__LIBSTD.patch +Patch3014: gcc12-dg-ice-fixes.patch +Patch3015: 0018-Use-CXX11-ABI.patch +Patch3016: 0019-xfails.patch +Patch3017: 0020-more-fixes.patch +Patch3018: 0021-libstdc++-disable-tests.patch +Patch3019: 0022-libstdc++-revert-behavior.patch +Patch3020: gcc12-testsuite-typo.patch Patch5001: 0025-Add-attribute-hot-judgement-for-INLINE_HINT_known_ho.patch Patch5002: 0026-Enable-small-loop-unrolling-for-O2.patch Patch5003: 0027-i386-Only-enable-small-loop-unrolling-in-backend-PR-.patch @@ -426,23 +429,23 @@ Patch5003: 0027-i386-Only-enable-small-loop-unrolling-in-backend-PR-.patch The %{?scl_prefix}gcc%{!?scl:12} package contains the GNU Compiler Collection version 10. %package -n libgcc -Summary: GCC version 12 shared support library -Autoreq: false +Summary: GCC version 12 shared support library +Autoreq: false %description -n libgcc This package contains GCC shared support library which is needed e.g. for exception handling support. %package c++ -Summary: C++ support for GCC version 12 -Requires: %{?scl_prefix}gcc%{!?scl:12} = %{version}-%{release} +Summary: C++ support for GCC version 12 +Requires: %{?scl_prefix}gcc%{!?scl:12} = %{version}-%{release} %if 0%{?rhel} >= 7 -Requires: libstdc++ +Requires: libstdc++ %else -Requires: libstdc++ >= 4.4.4-13 +Requires: libstdc++ >= 4.4.4-13 %endif -Requires: %{?scl_prefix}libstdc++%{!?scl:12}-devel = %{version}-%{release} -Autoreq: true +Requires: %{?scl_prefix}libstdc++%{!?scl:12}-devel = %{version}-%{release} +Autoreq: true %description c++ This package adds C++ support to the GNU Compiler Collection @@ -450,23 +453,23 @@ version 12. It includes support for most of the current C++ specification and a lot of support for the upcoming C++ specification. %package -n libstdc++ -Summary: GNU Standard C++ Library -Autoreq: true -Requires: glibc >= 2.10.90-7 +Summary: GNU Standard C++ Library +Autoreq: true +Requires: glibc >= 2.10.90-7 %description -n libstdc++ The libstdc++ package contains a rewritten standard compliant GCC Standard C++ Library. %package -n %{?scl_prefix}libstdc++%{!?scl:12}-devel -Summary: Header files and libraries for C++ development +Summary: Header files and libraries for C++ development %if 0%{?rhel} >= 7 -Requires: libstdc++ +Requires: libstdc++ %else -Requires: libstdc++ >= 4.4.4-13 +Requires: libstdc++ >= 4.4.4-13 %endif -Requires: libstdc++%{?_isa} -Autoreq: true +Requires: libstdc++%{?_isa} +Autoreq: true %description -n %{?scl_prefix}libstdc++%{!?scl:12}-devel This is the GNU implementation of the standard C++ libraries. This @@ -474,30 +477,30 @@ package includes the header files and libraries needed for C++ development. This includes rewritten implementation of STL. %package -n %{?scl_prefix}libstdc++%{!?scl:12}-docs -Summary: Documentation for the GNU standard C++ library -Autoreq: true +Summary: Documentation for the GNU standard C++ library +Autoreq: true %description -n %{?scl_prefix}libstdc++%{!?scl:12}-docs Manual, doxygen generated API information and Frequently Asked Questions for the GNU standard C++ library. %package gfortran -Summary: Fortran support for GCC 12 -Requires: %{?scl_prefix}gcc%{!?scl:12} = %{version}-%{release} +Summary: Fortran support for GCC 12 +Requires: %{?scl_prefix}gcc%{!?scl:12} = %{version}-%{release} %if 0%{?rhel} > 7 -Requires: libgfortran >= 8.1.1 +Requires: libgfortran >= 8.1.1 %else -Requires: libgfortran5 >= 8.1.1 +Requires: libgfortran5 >= 8.1.1 %endif -Autoreq: true +Autoreq: true %if %{build_libquadmath} %if 0%{!?scl:1} -Requires: libquadmath +Requires: libquadmath %endif -Requires: %{?scl_prefix}libquadmath-devel = %{version}-%{release} +Requires: %{?scl_prefix}libquadmath-devel = %{version}-%{release} %endif -Autoreq: true +Autoreq: true %description gfortran The %{?scl_prefix}gcc%{!?scl:10}-gfortran package provides support for compiling Fortran @@ -505,35 +508,35 @@ programs with the GNU Compiler Collection. %package gdb-plugin -Summary: GCC 12 plugin for GDB -Requires: %{?scl_prefix}gcc%{!?scl:12} = %{version}-%{release} +Summary: GCC 12 plugin for GDB +Requires: %{?scl_prefix}gcc%{!?scl:12} = %{version}-%{release} %description gdb-plugin This package contains GCC 12 plugin for GDB C expression evaluation. %package -n %{?scl_prefix}libgccjit -Summary: Library for embedding GCC inside programs and libraries -Requires: %{?scl_prefix}gcc%{!?scl:12} = %{version}-%{release} +Summary: Library for embedding GCC inside programs and libraries +Requires: %{?scl_prefix}gcc%{!?scl:12} = %{version}-%{release} %description -n %{?scl_prefix}libgccjit This package contains shared library with GCC 12 JIT front-end. %package -n %{?scl_prefix}libgccjit-devel -Summary: Support for embedding GCC inside programs and libraries -Group: Development/Libraries -Requires: %{?scl_prefix}libgccjit = %{version}-%{release} -Requires: %{?scl_prefix}libgccjit-docs = %{version}-%{release} +Summary: Support for embedding GCC inside programs and libraries +Group: Development/Libraries +Requires: %{?scl_prefix}libgccjit = %{version}-%{release} +Requires: %{?scl_prefix}libgccjit-docs = %{version}-%{release} %description -n %{?scl_prefix}libgccjit-devel This package contains header files for GCC 12 JIT front end. %package -n %{?scl_prefix}libgccjit-docs -Summary: Documentation for embedding GCC inside programs and libraries -Group: Development/Libraries +Summary: Documentation for embedding GCC inside programs and libraries +Group: Development/Libraries %if 0%{?rhel} > 7 -BuildRequires: python3-sphinx +BuildRequires: python3-sphinx %else -BuildRequires: python-sphinx +BuildRequires: python-sphinx %endif Requires(post): /sbin/install-info Requires(preun): /sbin/install-info @@ -542,7 +545,7 @@ Requires(preun): /sbin/install-info This package contains documentation for GCC 12 JIT front-end. %package -n libquadmath -Summary: GCC 12 __float128 shared support library +Summary: GCC 12 __float128 shared support library Requires(post): /sbin/install-info Requires(preun): /sbin/install-info @@ -551,24 +554,24 @@ This package contains GCC shared support library which is needed for __float128 math support and for Fortran REAL*16 support. %package -n %{?scl_prefix}libquadmath-devel -Summary: GCC 12 __float128 support -Group: Development/Libraries +Summary: GCC 12 __float128 support +Group: Development/Libraries %if 0%{!?scl:1} -Requires: %{?scl_prefix}libquadmath%{_isa} = %{version}-%{release} +Requires: %{?scl_prefix}libquadmath%{_isa} = %{version}-%{release} %else %if 0%{?rhel} >= 7 -Requires: libquadmath%{_isa} +Requires: libquadmath%{_isa} %endif %endif -Requires: %{?scl_prefix}gcc%{!?scl:12} = %{version}-%{release} +Requires: %{?scl_prefix}gcc%{!?scl:12} = %{version}-%{release} %description -n %{?scl_prefix}libquadmath-devel This package contains headers for building Fortran programs using REAL*16 and programs using __float128 math. %package -n libitm -Summary: The GNU Transactional Memory library -Group: System Environment/Libraries +Summary: The GNU Transactional Memory library +Group: System Environment/Libraries Requires(post): /sbin/install-info Requires(preun): /sbin/install-info @@ -577,21 +580,21 @@ This package contains the GNU Transactional Memory library which is a GCC transactional memory support runtime library. %package -n %{?scl_prefix}libitm-devel -Summary: The GNU Transactional Memory support -Requires: libitm%{_isa} >= 4.7.0-1 -Requires: %{?scl_prefix}gcc%{!?scl:12} = %{version}-%{release} +Summary: The GNU Transactional Memory support +Requires: libitm%{_isa} >= 4.7.0-1 +Requires: %{?scl_prefix}gcc%{!?scl:12} = %{version}-%{release} %description -n %{?scl_prefix}libitm-devel This package contains headers and support files for the GNU Transactional Memory library. %package plugin-devel -Summary: Support for compiling GCC plugins -Requires: %{?scl_prefix}gcc%{!?scl:12} = %{version}-%{release} +Summary: Support for compiling GCC plugins +Requires: %{?scl_prefix}gcc%{!?scl:12} = %{version}-%{release} %if 0%{?rhel} >= 7 -Requires: gmp-devel >= 4.3.2 -Requires: mpfr-devel >= 3.1.0 -Requires: libmpc-devel >= 0.8.1 +Requires: gmp-devel >= 4.3.2 +Requires: mpfr-devel >= 3.1.0 +Requires: libmpc-devel >= 0.8.1 %endif %description plugin-devel @@ -600,8 +603,8 @@ for compiling GCC 12 plugins. The GCC plugin ABI is currently not stable, so plugins must be rebuilt any time GCC is updated. %package -n libatomic -Summary: The GNU Atomic library -Group: System Environment/Libraries +Summary: The GNU Atomic library +Group: System Environment/Libraries Requires(post): /sbin/install-info Requires(preun): /sbin/install-info @@ -611,15 +614,15 @@ which is a GCC support runtime library for atomic operations not supported by hardware. %package -n %{?scl_prefix}libatomic-devel -Summary: The GNU Atomic static library -Requires: libatomic%{_isa} >= 4.8.0 +Summary: The GNU Atomic static library +Requires: libatomic%{_isa} >= 4.8.0 %description -n %{?scl_prefix}libatomic-devel This package contains GNU Atomic static libraries. %package -n libasan8 -Summary: The Address Sanitizer runtime library from GCC 12 -Group: System Environment/Libraries +Summary: The Address Sanitizer runtime library from GCC 12 +Group: System Environment/Libraries Requires(post): /sbin/install-info Requires(preun): /sbin/install-info @@ -628,15 +631,15 @@ This package contains the Address Sanitizer library from GCC 12 which is used for -fsanitize=address instrumented programs. %package -n %{?scl_prefix}libasan-devel -Summary: The Address Sanitizer static library -Requires: libasan8%{_isa} >= 12.1.1 -Obsoletes: libasan5 <= 8.3.1 +Summary: The Address Sanitizer static library +Requires: libasan8%{_isa} >= 12.1.1 +Obsoletes: libasan5 <= 8.3.1 %description -n %{?scl_prefix}libasan-devel This package contains Address Sanitizer static runtime library. %package -n libtsan2 -Summary: The Thread Sanitizer runtime library +Summary: The Thread Sanitizer runtime library Requires(post): /sbin/install-info Requires(preun): /sbin/install-info @@ -645,14 +648,14 @@ This package contains the Thread Sanitizer library which is used for -fsanitize=thread instrumented programs. %package -n %{?scl_prefix}libtsan-devel -Summary: The Thread Sanitizer static library -Requires: libtsan2%{_isa} >= 12.1.1 +Summary: The Thread Sanitizer static library +Requires: libtsan2%{_isa} >= 12.1.1 %description -n %{?scl_prefix}libtsan-devel This package contains Thread Sanitizer static runtime library. %package -n libubsan1 -Summary: The Undefined Behavior Sanitizer runtime library +Summary: The Undefined Behavior Sanitizer runtime library Requires(post): /sbin/install-info Requires(preun): /sbin/install-info @@ -661,19 +664,19 @@ This package contains the Undefined Behavior Sanitizer library which is used for -fsanitize=undefined instrumented programs. %package -n %{?scl_prefix}libubsan-devel -Summary: The Undefined Behavior Sanitizer static library +Summary: The Undefined Behavior Sanitizer static library %if 0%{?rhel} > 7 -Requires: libubsan%{_isa} >= 8.3.1 -Obsoletes: libubsan1 <= 8.3.1 +Requires: libubsan%{_isa} >= 8.3.1 +Obsoletes: libubsan1 <= 8.3.1 %else -Requires: libubsan1%{_isa} >= 8.3.1 +Requires: libubsan1%{_isa} >= 8.3.1 %endif %description -n %{?scl_prefix}libubsan-devel This package contains Undefined Behavior Sanitizer static runtime library. %package -n liblsan -Summary: The Leak Sanitizer runtime library +Summary: The Leak Sanitizer runtime library Requires(post): /sbin/install-info Requires(preun): /sbin/install-info @@ -682,16 +685,16 @@ This package contains the Leak Sanitizer library which is used for -fsanitize=leak instrumented programs. %package -n %{?scl_prefix}liblsan-devel -Summary: The Leak Sanitizer static library -Requires: liblsan%{_isa} >= 5.1.1 +Summary: The Leak Sanitizer static library +Requires: liblsan%{_isa} >= 5.1.1 %description -n %{?scl_prefix}liblsan-devel This package contains Leak Sanitizer static runtime library. %package -n %{?scl_prefix}offload-nvptx -Summary: Offloading compiler to NVPTX -Requires: gcc >= 8.3.1 -Requires: libgomp-offload-nvptx >= 8.3.1 +Summary: Offloading compiler to NVPTX +Requires: gcc >= 8.3.1 +Requires: libgomp-offload-nvptx >= 8.3.1 %description -n %{?scl_prefix}offload-nvptx The gcc-offload-nvptx package provides offloading support for @@ -701,9 +704,9 @@ to NVidia PTX capable devices if available. %if %{build_annobin_plugin} %package -n %{?scl_prefix}gcc-plugin-annobin -Summary: The annobin plugin for gcc, built by the installed version of gcc -Requires: %{?scl_prefix}gcc = %{version}-%{release} -BuildRequires: rpm-devel, binutils-devel, xz +Summary: The annobin plugin for gcc, built by the installed version of gcc +Requires: %{?scl_prefix}gcc = %{version}-%{release} +BuildRequires: rpm-devel, binutils-devel, xz %description -n %{?scl_prefix}gcc-plugin-annobin This package adds a version of the annobin plugin for gcc. This version @@ -738,6 +741,9 @@ so that there cannot be any synchronization problems. %patch15 -p0 -b .static-libquadmath~ %patch16 -p1 -b .fma~ %patch17 -p1 -b .pr113960~ +%patch18 -p1 -b .vector-merge-1~ +%patch19 -p1 -b .vector-merge-2~ +%patch20 -p1 -b .vector-merge-3~ %if 0%{?rhel} >= 6 %patch100 -p1 -b .fortran-fdec-duplicates~ @@ -3000,11 +3006,17 @@ fi %endif %changelog -* Wed Jul 17 2024 Haochen Jiang 12.2.1-7.6.0.1 +* Tue Aug 27 2024 Haochen Jiang 12.2.1-7.8.0.1 - Add attribute hot judgement for INLINE_HINT_known_hot hint - Enable small loop unrolling for O2 - i386: Only enable small loop unrolling in backend [PR 107692] +* Mon Jul 15 2024 Marek Polacek 12.2.1-7.8 +- bump NVR (RHEL-45189) + +* Fri Jul 12 2024 Marek Polacek 12.2.1-7.7 +- fix wrong RTL patterns for vector merge high/low word on LE (RHEL-45189) + * Wed Apr 3 2024 Marek Polacek 12.2.1-7.6 - bump NVR (RHEL-31253) diff --git a/gcc12-vector-merge-1.patch b/gcc12-vector-merge-1.patch new file mode 100644 index 0000000000000000000000000000000000000000..f3d6ff95c0491f2aceaa9dcbfad89f7760de5e94 --- /dev/null +++ b/gcc12-vector-merge-1.patch @@ -0,0 +1,522 @@ +commit 96ef3367067219c8e3eb88c0474a1090cc7749b4 +Author: Kewen Lin +Date: Thu Jun 20 20:23:56 2024 -0500 + + rs6000: Fix wrong RTL patterns for vector merge high/low word on LE + + Commit r12-4496 changes some define_expands and define_insns + for vector merge high/low word, which are altivec_vmrg[hl]w, + vsx_xxmrg[hl]w_. These defines are mainly for + built-in function vec_merge{h,l}, __builtin_vsx_xxmrghw, + __builtin_vsx_xxmrghw_4si and some internal gen function + needs. These functions should consider endianness, taking + vec_mergeh as example, as PVIPR defines, vec_mergeh "Merges + the first halves (in element order) of two vectors", it does + note it's in element order. So it's mapped into vmrghw on + BE while vmrglw on LE respectively. Although the mapped + insns are different, as the discussion in PR106069, the RTL + pattern should be still the same, it is conformed before + commit r12-4496, define_expand altivec_vmrghw got expanded + into: + + (vec_select:VSX_W + (vec_concat: + (match_operand:VSX_W 1 "register_operand" "wa,v") + (match_operand:VSX_W 2 "register_operand" "wa,v")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] + + on both BE and LE then. But commit r12-4496 changed it to + expand into: + + (vec_select:VSX_W + (vec_concat: + (match_operand:VSX_W 1 "register_operand" "wa,v") + (match_operand:VSX_W 2 "register_operand" "wa,v")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] + + on BE, and + + (vec_select:VSX_W + (vec_concat: + (match_operand:VSX_W 1 "register_operand" "wa,v") + (match_operand:VSX_W 2 "register_operand" "wa,v")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] + + on LE, although the mapped insn are still vmrghw on BE and + vmrglw on LE, the associated RTL pattern is completely + wrong and inconsistent with the mapped insn. If optimization + passes leave this pattern alone, even if its pattern doesn't + represent its mapped insn, it's still fine, that's why simple + testing on bif doesn't expose this issue. But once some + optimization pass such as combine does some changes basing + on this wrong pattern, because the pattern doesn't match the + semantics that the expanded insn is intended to represent, + it would cause the unexpected result. + + So this patch is to fix the wrong RTL pattern, ensure the + associated RTL patterns become the same as before which can + have the same semantic as their mapped insns. With the + proposed patch, the expanders like altivec_vmrghw expands + into altivec_vmrghb_direct_be or altivec_vmrglb_direct_le + depending on endianness, "direct" can easily show which + insn would be generated, _be and _le are mainly for the + different RTL patterns as endianness. + + Co-authored-by: Xionghu Luo + + PR target/106069 + PR target/115355 + + gcc/ChangeLog: + + * config/rs6000/altivec.md (altivec_vmrghw_direct_): Rename + to ... + (altivec_vmrghw_direct__be): ... this. Add the condition + BYTES_BIG_ENDIAN. + (altivec_vmrghw_direct__le): New define_insn. + (altivec_vmrglw_direct_): Rename to ... + (altivec_vmrglw_direct__be): ... this. Add the condition + BYTES_BIG_ENDIAN. + (altivec_vmrglw_direct__le): New define_insn. + (altivec_vmrghw): Adjust by calling gen_altivec_vmrghw_direct_v4si_be + for BE and gen_altivec_vmrglw_direct_v4si_le for LE. + (altivec_vmrglw): Adjust by calling gen_altivec_vmrglw_direct_v4si_be + for BE and gen_altivec_vmrghw_direct_v4si_le for LE. + (vec_widen_umult_hi_v8hi): Adjust the call to + gen_altivec_vmrghw_direct_v4si by gen_altivec_vmrghw for BE + and by gen_altivec_vmrglw for LE. + (vec_widen_smult_hi_v8hi): Likewise. + (vec_widen_umult_lo_v8hi): Adjust the call to + gen_altivec_vmrglw_direct_v4si by gen_altivec_vmrglw for BE + and by gen_altivec_vmrghw for LE + (vec_widen_smult_lo_v8hi): Likewise. + * config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Replace + CODE_FOR_altivec_vmrghw_direct_v4si by + CODE_FOR_altivec_vmrghw_direct_v4si_be for BE and + CODE_FOR_altivec_vmrghw_direct_v4si_le for LE. And replace + CODE_FOR_altivec_vmrglw_direct_v4si by + CODE_FOR_altivec_vmrglw_direct_v4si_be for BE and + CODE_FOR_altivec_vmrglw_direct_v4si_le for LE. + * config/rs6000/vsx.md (vsx_xxmrghw_): Adjust by calling + gen_altivec_vmrghw_direct_v4si_be for BE and + gen_altivec_vmrglw_direct_v4si_le for LE. + (vsx_xxmrglw_): Adjust by calling + gen_altivec_vmrglw_direct_v4si_be for BE and + gen_altivec_vmrghw_direct_v4si_le for LE. + + gcc/testsuite/ChangeLog: + + * g++.target/powerpc/pr106069.C: New test. + * gcc.target/powerpc/pr115355.c: New test. + + (cherry picked from commit 52c112800d9f44457c4832309a48c00945811313) + +diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md +index 3849db5ca3c..0c408a9e839 100644 +--- a/gcc/config/rs6000/altivec.md ++++ b/gcc/config/rs6000/altivec.md +@@ -1212,16 +1212,18 @@ (define_expand "altivec_vmrghw" + (use (match_operand:V4SI 2 "register_operand"))] + "VECTOR_MEM_ALTIVEC_P (V4SImode)" + { +- rtx (*fun) (rtx, rtx, rtx); +- fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrghw_direct_v4si +- : gen_altivec_vmrglw_direct_v4si; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn (gen_altivec_vmrghw_direct_v4si_be (operands[0], ++ operands[1], ++ operands[2])); ++ else ++ emit_insn (gen_altivec_vmrglw_direct_v4si_le (operands[0], ++ operands[2], ++ operands[1])); + DONE; + }) + +-(define_insn "altivec_vmrghw_direct_" ++(define_insn "altivec_vmrghw_direct__be" + [(set (match_operand:VSX_W 0 "register_operand" "=wa,v") + (vec_select:VSX_W + (vec_concat: +@@ -1229,7 +1231,21 @@ (define_insn "altivec_vmrghw_direct_" + (match_operand:VSX_W 2 "register_operand" "wa,v")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] +- "TARGET_ALTIVEC" ++ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN" ++ "@ ++ xxmrghw %x0,%x1,%x2 ++ vmrghw %0,%1,%2" ++ [(set_attr "type" "vecperm")]) ++ ++(define_insn "altivec_vmrghw_direct__le" ++ [(set (match_operand:VSX_W 0 "register_operand" "=wa,v") ++ (vec_select:VSX_W ++ (vec_concat: ++ (match_operand:VSX_W 2 "register_operand" "wa,v") ++ (match_operand:VSX_W 1 "register_operand" "wa,v")) ++ (parallel [(const_int 2) (const_int 6) ++ (const_int 3) (const_int 7)])))] ++ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN" + "@ + xxmrghw %x0,%x1,%x2 + vmrghw %0,%1,%2" +@@ -1318,16 +1334,18 @@ (define_expand "altivec_vmrglw" + (use (match_operand:V4SI 2 "register_operand"))] + "VECTOR_MEM_ALTIVEC_P (V4SImode)" + { +- rtx (*fun) (rtx, rtx, rtx); +- fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrglw_direct_v4si +- : gen_altivec_vmrghw_direct_v4si; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn (gen_altivec_vmrglw_direct_v4si_be (operands[0], ++ operands[1], ++ operands[2])); ++ else ++ emit_insn (gen_altivec_vmrghw_direct_v4si_le (operands[0], ++ operands[2], ++ operands[1])); + DONE; + }) + +-(define_insn "altivec_vmrglw_direct_" ++(define_insn "altivec_vmrglw_direct__be" + [(set (match_operand:VSX_W 0 "register_operand" "=wa,v") + (vec_select:VSX_W + (vec_concat: +@@ -1335,7 +1353,21 @@ (define_insn "altivec_vmrglw_direct_" + (match_operand:VSX_W 2 "register_operand" "wa,v")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] +- "TARGET_ALTIVEC" ++ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN" ++ "@ ++ xxmrglw %x0,%x1,%x2 ++ vmrglw %0,%1,%2" ++ [(set_attr "type" "vecperm")]) ++ ++(define_insn "altivec_vmrglw_direct__le" ++ [(set (match_operand:VSX_W 0 "register_operand" "=wa,v") ++ (vec_select:VSX_W ++ (vec_concat: ++ (match_operand:VSX_W 2 "register_operand" "wa,v") ++ (match_operand:VSX_W 1 "register_operand" "wa,v")) ++ (parallel [(const_int 0) (const_int 4) ++ (const_int 1) (const_int 5)])))] ++ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN" + "@ + xxmrglw %x0,%x1,%x2 + vmrglw %0,%1,%2" +@@ -3807,13 +3839,13 @@ (define_expand "vec_widen_umult_hi_v8hi" + { + emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghw_direct_v4si (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghw_direct_v4si (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); + } + DONE; + }) +@@ -3832,13 +3864,13 @@ (define_expand "vec_widen_umult_lo_v8hi" + { + emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglw_direct_v4si (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglw_direct_v4si (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); + } + DONE; + }) +@@ -3857,13 +3889,13 @@ (define_expand "vec_widen_smult_hi_v8hi" + { + emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghw_direct_v4si (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghw_direct_v4si (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); + } + DONE; + }) +@@ -3882,13 +3914,13 @@ (define_expand "vec_widen_smult_lo_v8hi" + { + emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglw_direct_v4si (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglw_direct_v4si (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); + } + DONE; + }) +diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc +index f5db6436dfa..23b553131a9 100644 +--- a/gcc/config/rs6000/rs6000.cc ++++ b/gcc/config/rs6000/rs6000.cc +@@ -22979,8 +22979,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, + : CODE_FOR_altivec_vmrglh_direct, + {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}}, + {OPTION_MASK_ALTIVEC, +- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si +- : CODE_FOR_altivec_vmrglw_direct_v4si, ++ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si_be ++ : CODE_FOR_altivec_vmrglw_direct_v4si_le, + {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}}, + {OPTION_MASK_ALTIVEC, + BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct +@@ -22991,8 +22991,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, + : CODE_FOR_altivec_vmrghh_direct, + {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}}, + {OPTION_MASK_ALTIVEC, +- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si +- : CODE_FOR_altivec_vmrghw_direct_v4si, ++ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si_be ++ : CODE_FOR_altivec_vmrghw_direct_v4si_le, + {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}}, + {OPTION_MASK_P8_VECTOR, + BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct +diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md +index e16f893c073..226a1049917 100644 +--- a/gcc/config/rs6000/vsx.md ++++ b/gcc/config/rs6000/vsx.md +@@ -4694,12 +4694,14 @@ (define_expand "vsx_xxmrghw_" + (const_int 1) (const_int 5)])))] + "VECTOR_MEM_VSX_P (mode)" + { +- rtx (*fun) (rtx, rtx, rtx); +- fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrghw_direct_ +- : gen_altivec_vmrglw_direct_; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn (gen_altivec_vmrghw_direct_v4si_be (operands[0], ++ operands[1], ++ operands[2])); ++ else ++ emit_insn (gen_altivec_vmrglw_direct_v4si_le (operands[0], ++ operands[2], ++ operands[1])); + DONE; + } + [(set_attr "type" "vecperm")]) +@@ -4714,12 +4716,14 @@ (define_expand "vsx_xxmrglw_" + (const_int 3) (const_int 7)])))] + "VECTOR_MEM_VSX_P (mode)" + { +- rtx (*fun) (rtx, rtx, rtx); +- fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrglw_direct_ +- : gen_altivec_vmrghw_direct_; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn (gen_altivec_vmrglw_direct_v4si_be (operands[0], ++ operands[1], ++ operands[2])); ++ else ++ emit_insn (gen_altivec_vmrghw_direct_v4si_le (operands[0], ++ operands[2], ++ operands[1])); + DONE; + } + [(set_attr "type" "vecperm")]) +diff --git a/gcc/testsuite/g++.target/powerpc/pr106069.C b/gcc/testsuite/g++.target/powerpc/pr106069.C +new file mode 100644 +index 00000000000..537207d2fe8 +--- /dev/null ++++ b/gcc/testsuite/g++.target/powerpc/pr106069.C +@@ -0,0 +1,119 @@ ++/* { dg-options "-O -fno-tree-forwprop -maltivec" } */ ++/* { dg-require-effective-target vmx_hw } */ ++/* { dg-do run } */ ++ ++typedef __attribute__ ((altivec (vector__))) unsigned native_simd_type; ++ ++union ++{ ++ native_simd_type V; ++ int R[4]; ++} store_le_vec; ++ ++struct S ++{ ++ S () = default; ++ S (unsigned B0) ++ { ++ native_simd_type val{B0}; ++ m_simd = val; ++ } ++ void store_le (unsigned int out[]) ++ { ++ store_le_vec.V = m_simd; ++ unsigned int x0 = store_le_vec.R[0]; ++ __builtin_memcpy (out, &x0, 4); ++ } ++ S rotl (unsigned int r) ++ { ++ native_simd_type rot{r}; ++ return __builtin_vec_rl (m_simd, rot); ++ } ++ void operator+= (S other) ++ { ++ m_simd = __builtin_vec_add (m_simd, other.m_simd); ++ } ++ void operator^= (S other) ++ { ++ m_simd = __builtin_vec_xor (m_simd, other.m_simd); ++ } ++ static void transpose (S &B0, S B1, S B2, S B3) ++ { ++ native_simd_type T0 = __builtin_vec_mergeh (B0.m_simd, B2.m_simd); ++ native_simd_type T1 = __builtin_vec_mergeh (B1.m_simd, B3.m_simd); ++ native_simd_type T2 = __builtin_vec_mergel (B0.m_simd, B2.m_simd); ++ native_simd_type T3 = __builtin_vec_mergel (B1.m_simd, B3.m_simd); ++ B0 = __builtin_vec_mergeh (T0, T1); ++ B3 = __builtin_vec_mergel (T2, T3); ++ } ++ S (native_simd_type x) : m_simd (x) {} ++ native_simd_type m_simd; ++}; ++ ++void ++foo (unsigned int output[], unsigned state[]) ++{ ++ S R00 = state[0]; ++ S R01 = state[0]; ++ S R02 = state[2]; ++ S R03 = state[0]; ++ S R05 = state[5]; ++ S R06 = state[6]; ++ S R07 = state[7]; ++ S R08 = state[8]; ++ S R09 = state[9]; ++ S R10 = state[10]; ++ S R11 = state[11]; ++ S R12 = state[12]; ++ S R13 = state[13]; ++ S R14 = state[4]; ++ S R15 = state[15]; ++ for (int r = 0; r != 10; ++r) ++ { ++ R09 += R13; ++ R11 += R15; ++ R05 ^= R09; ++ R06 ^= R10; ++ R07 ^= R11; ++ R07 = R07.rotl (7); ++ R00 += R05; ++ R01 += R06; ++ R02 += R07; ++ R15 ^= R00; ++ R12 ^= R01; ++ R13 ^= R02; ++ R00 += R05; ++ R01 += R06; ++ R02 += R07; ++ R15 ^= R00; ++ R12 = R12.rotl (8); ++ R13 = R13.rotl (8); ++ R10 += R15; ++ R11 += R12; ++ R08 += R13; ++ R09 += R14; ++ R05 ^= R10; ++ R06 ^= R11; ++ R07 ^= R08; ++ R05 = R05.rotl (7); ++ R06 = R06.rotl (7); ++ R07 = R07.rotl (7); ++ } ++ R00 += state[0]; ++ S::transpose (R00, R01, R02, R03); ++ R00.store_le (output); ++} ++ ++unsigned int res[1]; ++unsigned main_state[]{1634760805, 60878, 2036477234, 6, ++ 0, 825562964, 1471091955, 1346092787, ++ 506976774, 4197066702, 518848283, 118491664, ++ 0, 0, 0, 0}; ++int ++main () ++{ ++ foo (res, main_state); ++ if (res[0] != 0x41fcef98) ++ __builtin_abort (); ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/powerpc/pr115355.c b/gcc/testsuite/gcc.target/powerpc/pr115355.c +new file mode 100644 +index 00000000000..8955126b808 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/powerpc/pr115355.c +@@ -0,0 +1,37 @@ ++/* { dg-do run } */ ++/* { dg-require-effective-target p9vector_hw } */ ++/* Force vectorization with -fno-vect-cost-model to have vector unpack ++ which exposes the issue in PR115355. */ ++/* { dg-options "-O2 -mdejagnu-cpu=power9 -fno-vect-cost-model" } */ ++ ++/* Verify it runs successfully. */ ++ ++__attribute__((noipa)) ++void setToIdentityGOOD(unsigned long long *mVec, unsigned int mLen) ++{ ++ #pragma GCC novector ++ for (unsigned int i = 0; i < mLen; i++) ++ mVec[i] = i; ++} ++ ++__attribute__((noipa)) ++void setToIdentityBAD(unsigned long long *mVec, unsigned int mLen) ++{ ++ for (unsigned int i = 0; i < mLen; i++) ++ mVec[i] = i; ++} ++ ++unsigned long long vec1[100]; ++unsigned long long vec2[100]; ++ ++int main() ++{ ++ unsigned int l = 29; ++ setToIdentityGOOD (vec1, 29); ++ setToIdentityBAD (vec2, 29); ++ ++ if (__builtin_memcmp (vec1, vec2, l * sizeof (vec1[0])) != 0) ++ __builtin_abort (); ++ ++ return 0; ++} diff --git a/gcc12-vector-merge-2.patch b/gcc12-vector-merge-2.patch new file mode 100644 index 0000000000000000000000000000000000000000..974f6e1b862bb2f71b352c3d697e494617a5b3d3 --- /dev/null +++ b/gcc12-vector-merge-2.patch @@ -0,0 +1,240 @@ +commit 13f0528c782c3732052973a5d340769af8182c8f +Author: Kewen Lin +Date: Wed Jun 26 02:16:17 2024 -0500 + + rs6000: Fix wrong RTL patterns for vector merge high/low char on LE + + Commit r12-4496 changes some define_expands and define_insns + for vector merge high/low char, which are altivec_vmrg[hl]b. + These defines are mainly for built-in function vec_merge{h,l} + and some internal gen function needs. These functions should + consider endianness, taking vec_mergeh as example, as PVIPR + defines, vec_mergeh "Merges the first halves (in element order) + of two vectors", it does note it's in element order. So it's + mapped into vmrghb on BE while vmrglb on LE respectively. + Although the mapped insns are different, as the discussion in + PR106069, the RTL pattern should be still the same, it is + conformed before commit r12-4496, but gets changed into + different patterns on BE and LE starting from commit r12-4496. + Similar to 32-bit element case in commit log of r15-1504, this + 8-bit element pattern on LE doesn't actually match what the + underlying insn is intended to represent, once some optimization + like combine does some changes basing on it, it would cause + the unexpected consequence. The newly constructed test case + pr106069-1.c is a typical example for this issue. + + So this patch is to fix the wrong RTL pattern, ensure the + associated RTL patterns become the same as before which can + have the same semantic as their mapped insns. With the + proposed patch, the expanders like altivec_vmrghb expands + into altivec_vmrghb_direct_be or altivec_vmrglb_direct_le + depending on endianness, "direct" can easily show which + insn would be generated, _be and _le are mainly for the + different RTL patterns as endianness. + + Co-authored-by: Xionghu Luo + + PR target/106069 + PR target/115355 + + gcc/ChangeLog: + + * config/rs6000/altivec.md (altivec_vmrghb_direct): Rename to ... + (altivec_vmrghb_direct_be): ... this. Add condition BYTES_BIG_ENDIAN. + (altivec_vmrghb_direct_le): New define_insn. + (altivec_vmrglb_direct): Rename to ... + (altivec_vmrglb_direct_be): ... this. Add condition BYTES_BIG_ENDIAN. + (altivec_vmrglb_direct_le): New define_insn. + (altivec_vmrghb): Adjust by calling gen_altivec_vmrghb_direct_be + for BE and gen_altivec_vmrglb_direct_le for LE. + (altivec_vmrglb): Adjust by calling gen_altivec_vmrglb_direct_be + for BE and gen_altivec_vmrghb_direct_le for LE. + * config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Replace + CODE_FOR_altivec_vmrghb_direct by + CODE_FOR_altivec_vmrghb_direct_be for BE and + CODE_FOR_altivec_vmrghb_direct_le for LE. And replace + CODE_FOR_altivec_vmrglb_direct by + CODE_FOR_altivec_vmrglb_direct_be for BE and + CODE_FOR_altivec_vmrglb_direct_le for LE. + + gcc/testsuite/ChangeLog: + + * gcc.target/powerpc/pr106069-1.c: New test. + + (cherry picked from commit 62520e4e9f7e2fe8a16ee57a4bd35da2e921ae22) + +diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md +index 0c408a9e839..b8baae679c4 100644 +--- a/gcc/config/rs6000/altivec.md ++++ b/gcc/config/rs6000/altivec.md +@@ -1152,15 +1152,16 @@ (define_expand "altivec_vmrghb" + (use (match_operand:V16QI 2 "register_operand"))] + "TARGET_ALTIVEC" + { +- rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrghb_direct +- : gen_altivec_vmrglb_direct; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn ( ++ gen_altivec_vmrghb_direct_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn ( ++ gen_altivec_vmrglb_direct_le (operands[0], operands[2], operands[1])); + DONE; + }) + +-(define_insn "altivec_vmrghb_direct" ++(define_insn "altivec_vmrghb_direct_be" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (vec_select:V16QI + (vec_concat:V32QI +@@ -1174,7 +1175,25 @@ (define_insn "altivec_vmrghb_direct" + (const_int 5) (const_int 21) + (const_int 6) (const_int 22) + (const_int 7) (const_int 23)])))] +- "TARGET_ALTIVEC" ++ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN" ++ "vmrghb %0,%1,%2" ++ [(set_attr "type" "vecperm")]) ++ ++(define_insn "altivec_vmrghb_direct_le" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (vec_select:V16QI ++ (vec_concat:V32QI ++ (match_operand:V16QI 2 "register_operand" "v") ++ (match_operand:V16QI 1 "register_operand" "v")) ++ (parallel [(const_int 8) (const_int 24) ++ (const_int 9) (const_int 25) ++ (const_int 10) (const_int 26) ++ (const_int 11) (const_int 27) ++ (const_int 12) (const_int 28) ++ (const_int 13) (const_int 29) ++ (const_int 14) (const_int 30) ++ (const_int 15) (const_int 31)])))] ++ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN" + "vmrghb %0,%1,%2" + [(set_attr "type" "vecperm")]) + +@@ -1274,15 +1293,16 @@ (define_expand "altivec_vmrglb" + (use (match_operand:V16QI 2 "register_operand"))] + "TARGET_ALTIVEC" + { +- rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrglb_direct +- : gen_altivec_vmrghb_direct; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn ( ++ gen_altivec_vmrglb_direct_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn ( ++ gen_altivec_vmrghb_direct_le (operands[0], operands[2], operands[1])); + DONE; + }) + +-(define_insn "altivec_vmrglb_direct" ++(define_insn "altivec_vmrglb_direct_be" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (vec_select:V16QI + (vec_concat:V32QI +@@ -1296,7 +1316,25 @@ (define_insn "altivec_vmrglb_direct" + (const_int 13) (const_int 29) + (const_int 14) (const_int 30) + (const_int 15) (const_int 31)])))] +- "TARGET_ALTIVEC" ++ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN" ++ "vmrglb %0,%1,%2" ++ [(set_attr "type" "vecperm")]) ++ ++(define_insn "altivec_vmrglb_direct_le" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (vec_select:V16QI ++ (vec_concat:V32QI ++ (match_operand:V16QI 2 "register_operand" "v") ++ (match_operand:V16QI 1 "register_operand" "v")) ++ (parallel [(const_int 0) (const_int 16) ++ (const_int 1) (const_int 17) ++ (const_int 2) (const_int 18) ++ (const_int 3) (const_int 19) ++ (const_int 4) (const_int 20) ++ (const_int 5) (const_int 21) ++ (const_int 6) (const_int 22) ++ (const_int 7) (const_int 23)])))] ++ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN" + "vmrglb %0,%1,%2" + [(set_attr "type" "vecperm")]) + +diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc +index 23b553131a9..e8ce629182b 100644 +--- a/gcc/config/rs6000/rs6000.cc ++++ b/gcc/config/rs6000/rs6000.cc +@@ -22971,8 +22971,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, + CODE_FOR_altivec_vpkuwum_direct, + {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}}, + {OPTION_MASK_ALTIVEC, +- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct +- : CODE_FOR_altivec_vmrglb_direct, ++ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct_be ++ : CODE_FOR_altivec_vmrglb_direct_le, + {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}}, + {OPTION_MASK_ALTIVEC, + BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct +@@ -22983,8 +22983,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, + : CODE_FOR_altivec_vmrglw_direct_v4si_le, + {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}}, + {OPTION_MASK_ALTIVEC, +- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct +- : CODE_FOR_altivec_vmrghb_direct, ++ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct_be ++ : CODE_FOR_altivec_vmrghb_direct_le, + {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}}, + {OPTION_MASK_ALTIVEC, + BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct +diff --git a/gcc/testsuite/gcc.target/powerpc/pr106069-1.c b/gcc/testsuite/gcc.target/powerpc/pr106069-1.c +new file mode 100644 +index 00000000000..4945d8fedfb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/powerpc/pr106069-1.c +@@ -0,0 +1,39 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2" } */ ++/* { dg-require-effective-target vmx_hw } */ ++ ++/* Test vector merge for 8-bit element size, ++ it will abort if the RTL pattern isn't expected. */ ++ ++#include "altivec.h" ++ ++__attribute__((noipa)) ++signed char elem_6 (vector signed char a, vector signed char b) ++{ ++ vector signed char c = vec_mergeh (a,b); ++ return vec_extract (c, 6); ++} ++ ++__attribute__((noipa)) ++unsigned char elem_15 (vector unsigned char a, vector unsigned char b) ++{ ++ vector unsigned char c = vec_mergel (a,b); ++ return vec_extract (c, 15); ++} ++ ++int ++main () ++{ ++ vector unsigned char v1 ++ = {3, 33, 22, 12, 34, 14, 5, 25, 30, 11, 0, 21, 17, 27, 38, 8}; ++ vector unsigned char v2 ++ = {81, 82, 83, 84, 68, 67, 66, 65, 99, 100, 101, 102, 250, 125, 0, 6}; ++ signed char x1 = elem_6 ((vector signed char) v1, (vector signed char) v2); ++ unsigned char x2 = elem_15 (v1, v2); ++ ++ if (x1 != 12 || x2 != 6) ++ __builtin_abort (); ++ ++ return 0; ++} ++ diff --git a/gcc12-vector-merge-3.patch b/gcc12-vector-merge-3.patch new file mode 100644 index 0000000000000000000000000000000000000000..0360b1dcf4747695f66747ae4e2b2cbd0a820713 --- /dev/null +++ b/gcc12-vector-merge-3.patch @@ -0,0 +1,306 @@ +commit ca6eea0eb33de8b2e23e0bef3466575bb14ab63f +Author: Kewen Lin +Date: Wed Jun 26 02:16:17 2024 -0500 + + rs6000: Fix wrong RTL patterns for vector merge high/low short on LE + + Commit r12-4496 changes some define_expands and define_insns + for vector merge high/low short, which are altivec_vmrg[hl]h. + These defines are mainly for built-in function vec_merge{h,l} + and some internal gen function needs. These functions should + consider endianness, taking vec_mergeh as example, as PVIPR + defines, vec_mergeh "Merges the first halves (in element order) + of two vectors", it does note it's in element order. So it's + mapped into vmrghh on BE while vmrglh on LE respectively. + Although the mapped insns are different, as the discussion in + PR106069, the RTL pattern should be still the same, it is + conformed before commit r12-4496, but gets changed into + different patterns on BE and LE starting from commit r12-4496. + Similar to 32-bit element case in commit log of r15-1504, this + 16-bit element pattern on LE doesn't actually match what the + underlying insn is intended to represent, once some optimization + like combine does some changes basing on it, it would cause + the unexpected consequence. The newly constructed test case + pr106069-2.c is a typical example for this issue on element type + short. + + So this patch is to fix the wrong RTL pattern, ensure the + associated RTL patterns become the same as before which can + have the same semantic as their mapped insns. With the + proposed patch, the expanders like altivec_vmrghh expands + into altivec_vmrghh_direct_be or altivec_vmrglh_direct_le + depending on endianness, "direct" can easily show which + insn would be generated, _be and _le are mainly for the + different RTL patterns as endianness. + + Co-authored-by: Xionghu Luo + + PR target/106069 + PR target/115355 + + gcc/ChangeLog: + + * config/rs6000/altivec.md (altivec_vmrghh_direct): Rename to ... + (altivec_vmrghh_direct_be): ... this. Add condition BYTES_BIG_ENDIAN. + (altivec_vmrghh_direct_le): New define_insn. + (altivec_vmrglh_direct): Rename to ... + (altivec_vmrglh_direct_be): ... this. Add condition BYTES_BIG_ENDIAN. + (altivec_vmrglh_direct_le): New define_insn. + (altivec_vmrghh): Adjust by calling gen_altivec_vmrghh_direct_be + for BE and gen_altivec_vmrglh_direct_le for LE. + (altivec_vmrglh): Adjust by calling gen_altivec_vmrglh_direct_be + for BE and gen_altivec_vmrghh_direct_le for LE. + (vec_widen_umult_hi_v16qi): Adjust the call to + gen_altivec_vmrghh_direct by gen_altivec_vmrghh for BE + and by gen_altivec_vmrglh for LE. + (vec_widen_smult_hi_v16qi): Likewise. + (vec_widen_umult_lo_v16qi): Adjust the call to + gen_altivec_vmrglh_direct by gen_altivec_vmrglh for BE + and by gen_altivec_vmrghh for LE. + (vec_widen_smult_lo_v16qi): Likewise. + * config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Replace + CODE_FOR_altivec_vmrghh_direct by + CODE_FOR_altivec_vmrghh_direct_be for BE and + CODE_FOR_altivec_vmrghh_direct_le for LE. And replace + CODE_FOR_altivec_vmrglh_direct by + CODE_FOR_altivec_vmrglh_direct_be for BE and + CODE_FOR_altivec_vmrglh_direct_le for LE. + + gcc/testsuite/ChangeLog: + + * gcc.target/powerpc/pr106069-2.c: New test. + + (cherry picked from commit 812c70bf4981958488331d4ea5af8709b5321da1) + +diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md +index b8baae679c4..50689e418ed 100644 +--- a/gcc/config/rs6000/altivec.md ++++ b/gcc/config/rs6000/altivec.md +@@ -1203,17 +1203,18 @@ (define_expand "altivec_vmrghh" + (use (match_operand:V8HI 2 "register_operand"))] + "TARGET_ALTIVEC" + { +- rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrghh_direct +- : gen_altivec_vmrglh_direct; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn ( ++ gen_altivec_vmrghh_direct_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn ( ++ gen_altivec_vmrglh_direct_le (operands[0], operands[2], operands[1])); + DONE; + }) + +-(define_insn "altivec_vmrghh_direct" ++(define_insn "altivec_vmrghh_direct_be" + [(set (match_operand:V8HI 0 "register_operand" "=v") +- (vec_select:V8HI ++ (vec_select:V8HI + (vec_concat:V16HI + (match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")) +@@ -1221,7 +1222,21 @@ (define_insn "altivec_vmrghh_direct" + (const_int 1) (const_int 9) + (const_int 2) (const_int 10) + (const_int 3) (const_int 11)])))] +- "TARGET_ALTIVEC" ++ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN" ++ "vmrghh %0,%1,%2" ++ [(set_attr "type" "vecperm")]) ++ ++(define_insn "altivec_vmrghh_direct_le" ++ [(set (match_operand:V8HI 0 "register_operand" "=v") ++ (vec_select:V8HI ++ (vec_concat:V16HI ++ (match_operand:V8HI 2 "register_operand" "v") ++ (match_operand:V8HI 1 "register_operand" "v")) ++ (parallel [(const_int 4) (const_int 12) ++ (const_int 5) (const_int 13) ++ (const_int 6) (const_int 14) ++ (const_int 7) (const_int 15)])))] ++ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN" + "vmrghh %0,%1,%2" + [(set_attr "type" "vecperm")]) + +@@ -1344,15 +1359,16 @@ (define_expand "altivec_vmrglh" + (use (match_operand:V8HI 2 "register_operand"))] + "TARGET_ALTIVEC" + { +- rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrglh_direct +- : gen_altivec_vmrghh_direct; +- if (!BYTES_BIG_ENDIAN) +- std::swap (operands[1], operands[2]); +- emit_insn (fun (operands[0], operands[1], operands[2])); ++ if (BYTES_BIG_ENDIAN) ++ emit_insn ( ++ gen_altivec_vmrglh_direct_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn ( ++ gen_altivec_vmrghh_direct_le (operands[0], operands[2], operands[1])); + DONE; + }) + +-(define_insn "altivec_vmrglh_direct" ++(define_insn "altivec_vmrglh_direct_be" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (vec_select:V8HI + (vec_concat:V16HI +@@ -1362,7 +1378,21 @@ (define_insn "altivec_vmrglh_direct" + (const_int 5) (const_int 13) + (const_int 6) (const_int 14) + (const_int 7) (const_int 15)])))] +- "TARGET_ALTIVEC" ++ "TARGET_ALTIVEC && BYTES_BIG_ENDIAN" ++ "vmrglh %0,%1,%2" ++ [(set_attr "type" "vecperm")]) ++ ++(define_insn "altivec_vmrglh_direct_le" ++ [(set (match_operand:V8HI 0 "register_operand" "=v") ++ (vec_select:V8HI ++ (vec_concat:V16HI ++ (match_operand:V8HI 2 "register_operand" "v") ++ (match_operand:V8HI 1 "register_operand" "v")) ++ (parallel [(const_int 0) (const_int 8) ++ (const_int 1) (const_int 9) ++ (const_int 2) (const_int 10) ++ (const_int 3) (const_int 11)])))] ++ "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN" + "vmrglh %0,%1,%2" + [(set_attr "type" "vecperm")]) + +@@ -3777,13 +3807,13 @@ (define_expand "vec_widen_umult_hi_v16qi" + { + emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghh_direct (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrghh (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghh_direct (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrglh (operands[0], ve, vo)); + } + DONE; + }) +@@ -3802,13 +3832,13 @@ (define_expand "vec_widen_umult_lo_v16qi" + { + emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglh_direct (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrglh (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglh_direct (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrghh (operands[0], ve, vo)); + } + DONE; + }) +@@ -3827,13 +3857,13 @@ (define_expand "vec_widen_smult_hi_v16qi" + { + emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghh_direct (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrghh (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrghh_direct (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrglh (operands[0], ve, vo)); + } + DONE; + }) +@@ -3852,13 +3882,13 @@ (define_expand "vec_widen_smult_lo_v16qi" + { + emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglh_direct (operands[0], ve, vo)); ++ emit_insn (gen_altivec_vmrglh (operands[0], ve, vo)); + } + else + { + emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2])); +- emit_insn (gen_altivec_vmrglh_direct (operands[0], vo, ve)); ++ emit_insn (gen_altivec_vmrghh (operands[0], ve, vo)); + } + DONE; + }) +diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc +index e8ce629182b..34be43c9f84 100644 +--- a/gcc/config/rs6000/rs6000.cc ++++ b/gcc/config/rs6000/rs6000.cc +@@ -22975,8 +22975,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, + : CODE_FOR_altivec_vmrglb_direct_le, + {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}}, + {OPTION_MASK_ALTIVEC, +- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct +- : CODE_FOR_altivec_vmrglh_direct, ++ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct_be ++ : CODE_FOR_altivec_vmrglh_direct_le, + {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}}, + {OPTION_MASK_ALTIVEC, + BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si_be +@@ -22987,8 +22987,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, + : CODE_FOR_altivec_vmrghb_direct_le, + {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}}, + {OPTION_MASK_ALTIVEC, +- BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct +- : CODE_FOR_altivec_vmrghh_direct, ++ BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct_be ++ : CODE_FOR_altivec_vmrghh_direct_le, + {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}}, + {OPTION_MASK_ALTIVEC, + BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si_be +diff --git a/gcc/testsuite/gcc.target/powerpc/pr106069-2.c b/gcc/testsuite/gcc.target/powerpc/pr106069-2.c +new file mode 100644 +index 00000000000..283e3290fb3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/powerpc/pr106069-2.c +@@ -0,0 +1,37 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2" } */ ++/* { dg-require-effective-target vmx_hw } */ ++ ++/* Test vector merge for 16-bit element size, ++ it will abort if the RTL pattern isn't expected. */ ++ ++#include "altivec.h" ++ ++__attribute__((noipa)) ++signed short elem_2 (vector signed short a, vector signed short b) ++{ ++ vector signed short c = vec_mergeh (a,b); ++ return vec_extract (c, 2); ++} ++ ++__attribute__((noipa)) ++unsigned short elem_7 (vector unsigned short a, vector unsigned short b) ++{ ++ vector unsigned short c = vec_mergel (a,b); ++ return vec_extract (c, 7); ++} ++ ++int ++main () ++{ ++ vector unsigned short v1 = {3, 22, 12, 34, 5, 25, 30, 11}; ++ vector unsigned short v2 = {84, 168, 267, 966, 65, 399, 999, 99}; ++ signed short x1 = elem_2 ((vector signed short) v1, (vector signed short) v2); ++ unsigned short x2 = elem_7 (v1, v2); ++ ++ if (x1 != 22 || x2 != 99) ++ __builtin_abort (); ++ ++ return 0; ++} ++